diff --git a/docs/changelog/118114.yaml b/docs/changelog/118114.yaml new file mode 100644 index 0000000000000..1b7532d5df981 --- /dev/null +++ b/docs/changelog/118114.yaml @@ -0,0 +1,5 @@ +pr: 118114 +summary: Enable physical plan verification +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java index 53debedafc3d8..829943d245149 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java @@ -49,8 +49,9 @@ public Attribute(Source source, String name, Nullability nullability, @Nullable this.nullability = nullability; } - public static String rawTemporaryName(String inner, String outer, String suffix) { - return SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; + public static String rawTemporaryName(String... parts) { + var name = String.join("$", parts); + return name.isEmpty() || name.startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX) ? name : SYNTHETIC_ATTRIBUTE_NAME_PREFIX + name; } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 48bafd8eef00e..1eaade043658b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -57,7 +57,7 @@ protected List> batches() { } protected List> rules(boolean optimizeForEsSource) { - List> esSourceRules = new ArrayList<>(4); + List> esSourceRules = new ArrayList<>(6); esSourceRules.add(new ReplaceSourceAttributes()); if (optimizeForEsSource) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalVerifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalVerifier.java index 8bd8aba01fd21..20528f8dc2826 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalVerifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalVerifier.java @@ -8,9 +8,11 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.common.Failure; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.optimizer.rules.PlanConsistencyChecker; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; @@ -31,10 +33,14 @@ private PhysicalVerifier() {} /** Verifies the physical plan. */ public Collection verify(PhysicalPlan plan) { Set failures = new LinkedHashSet<>(); + Failures depFailures = new Failures(); plan.forEachDown(p -> { - // FIXME: re-enable - // DEPENDENCY_CHECK.checkPlan(p, failures); + if (p instanceof AggregateExec agg) { + var exclude = Expressions.references(agg.ordinalAttributes()); + DEPENDENCY_CHECK.checkPlan(p, exclude, depFailures); + return; + } if (p instanceof FieldExtractExec fieldExtractExec) { Attribute sourceAttribute = fieldExtractExec.sourceAttribute(); if (sourceAttribute == null) { @@ -48,8 +54,13 @@ public Collection verify(PhysicalPlan plan) { ); } } + DEPENDENCY_CHECK.checkPlan(p, depFailures); }); + if (depFailures.hasFailures()) { + throw new IllegalStateException(depFailures.toString()); + } + return failures; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PlanConsistencyChecker.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PlanConsistencyChecker.java index 30de8945a4c20..5101e3f73bfdf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PlanConsistencyChecker.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PlanConsistencyChecker.java @@ -26,9 +26,13 @@ public class PlanConsistencyChecker

> { * {@link org.elasticsearch.xpack.esql.common.Failure Failure}s to the {@link Failures} object. */ public void checkPlan(P p, Failures failures) { + checkPlan(p, AttributeSet.EMPTY, failures); + } + + public void checkPlan(P p, AttributeSet exclude, Failures failures) { AttributeSet refs = p.references(); AttributeSet input = p.inputSet(); - AttributeSet missing = refs.subtract(input); + AttributeSet missing = refs.subtract(input).subtract(exclude); // TODO: for Joins, we should probably check if the required fields from the left child are actually in the left child, not // just any child (and analogously for the right child). if (missing.isEmpty() == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index ed8851b64c27e..61b1554fb71bc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -11,7 +11,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; -import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; @@ -22,7 +21,6 @@ import java.util.ArrayList; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; @@ -54,18 +52,9 @@ public PhysicalPlan apply(PhysicalPlan plan) { * it loads the field lazily. If we have more than one field we need to * make sure the fields are loaded for the standard hash aggregator. */ - if (p instanceof AggregateExec agg && agg.groupings().size() == 1) { - // CATEGORIZE requires the standard hash aggregator as well. - if (agg.groupings().get(0).anyMatch(e -> e instanceof Categorize) == false) { - var leaves = new LinkedList<>(); - // TODO: this seems out of place - agg.aggregates() - .stream() - .filter(a -> agg.groupings().contains(a) == false) - .forEach(a -> leaves.addAll(a.collectLeaves())); - var remove = agg.groupings().stream().filter(g -> leaves.contains(g) == false).toList(); - missing.removeAll(Expressions.references(remove)); - } + if (p instanceof AggregateExec agg) { + var ordinalAttributes = agg.ordinalAttributes(); + missing.removeAll(Expressions.references(ordinalAttributes)); } // add extractor diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/AggregateExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/AggregateExec.java index 891d03c571b27..35f45250ed270 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/AggregateExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/AggregateExec.java @@ -18,10 +18,13 @@ import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -184,6 +187,24 @@ protected AttributeSet computeReferences() { return mode.isInputPartial() ? new AttributeSet(intermediateAttributes) : Aggregate.computeReferences(aggregates, groupings); } + /** Returns the attributes that can be loaded from ordinals -- no explicit extraction is needed */ + public List ordinalAttributes() { + List orginalAttributs = new ArrayList<>(groupings.size()); + // Ordinals can be leveraged just for a single grouping. If there are multiple groupings, fields need to be laoded for the + // hash aggregator. + // CATEGORIZE requires the standard hash aggregator as well. + if (groupings().size() == 1 && groupings.get(0).anyMatch(e -> e instanceof Categorize) == false) { + var leaves = new HashSet<>(); + aggregates.stream().filter(a -> groupings.contains(a) == false).forEach(a -> leaves.addAll(a.collectLeaves())); + groupings.forEach(g -> { + if (leaves.contains(g) == false) { + orginalAttributs.add((Attribute) g); + } + }); + } + return orginalAttributs; + } + @Override public int hashCode() { return Objects.hash(groupings, aggregates, mode, intermediateAttributes, estimatedRowSize, child()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java index 5530b3ea54d3d..d1d834b71047a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; @@ -72,6 +73,12 @@ public boolean inBetweenAggs() { return inBetweenAggs; } + @Override + protected AttributeSet computeReferences() { + // ExchangeExec does no input referencing, it only outputs all synthetic attributes, "sourced" from remote exchanges. + return AttributeSet.EMPTY; + } + @Override public UnaryExec replaceChild(PhysicalPlan newChild) { return new ExchangeExec(source(), output, inBetweenAggs, newChild); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index 35c6e4846bd88..ec996c5c84064 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -89,12 +89,7 @@ public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { @Override protected AttributeSet computeReferences() { - AttributeSet required = new AttributeSet(docValuesAttributes); - - required.add(sourceAttribute); - required.addAll(attributesToExtract); - - return required; + return sourceAttribute != null ? new AttributeSet(sourceAttribute) : AttributeSet.EMPTY; } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java index 8b1cc047309e7..26fd12447e664 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java @@ -93,9 +93,9 @@ public List addedFields() { public List output() { if (lazyOutput == null) { lazyOutput = new ArrayList<>(left().output()); - for (Attribute attr : addedFields) { - lazyOutput.add(attr); - } + var addedFieldsNames = addedFields.stream().map(Attribute::name).toList(); + lazyOutput.removeIf(a -> addedFieldsNames.contains(a.name())); + lazyOutput.addAll(addedFields); } return lazyOutput; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index 35aba7665ec87..57ba1c8016feb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -297,9 +297,9 @@ private void aggregatesToFactory( // coordinator/exchange phase else if (mode == AggregatorMode.FINAL || mode == AggregatorMode.INTERMEDIATE) { if (grouping) { - sourceAttr = aggregateMapper.mapGrouping(aggregateFunction); + sourceAttr = aggregateMapper.mapGrouping(ne); } else { - sourceAttr = aggregateMapper.mapNonGrouping(aggregateFunction); + sourceAttr = aggregateMapper.mapNonGrouping(ne); } } else { throw new EsqlIllegalArgumentException("illegal aggregation mode"); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 18bbfdf485a81..1f55e293b8e75 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -13,6 +13,7 @@ import org.elasticsearch.core.Tuple; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; @@ -91,7 +92,7 @@ final class AggregateMapper { private record AggDef(Class aggClazz, String type, String extra, boolean grouping) {} /** Map of AggDef types to intermediate named expressions. */ - private static final Map> mapper = AGG_FUNCTIONS.stream() + private static final Map> MAPPER = AGG_FUNCTIONS.stream() .flatMap(AggregateMapper::typeAndNames) .flatMap(AggregateMapper::groupingAndNonGrouping) .collect(Collectors.toUnmodifiableMap(aggDef -> aggDef, AggregateMapper::lookupIntermediateState)); @@ -103,50 +104,57 @@ private record AggDef(Class aggClazz, String type, String extra, boolean grou cache = new HashMap<>(); } - public List mapNonGrouping(List aggregates) { + public List mapNonGrouping(List aggregates) { return doMapping(aggregates, false); } - public List mapNonGrouping(Expression aggregate) { + public List mapNonGrouping(NamedExpression aggregate) { return map(aggregate, false).toList(); } - public List mapGrouping(List aggregates) { + public List mapGrouping(List aggregates) { return doMapping(aggregates, true); } - private List doMapping(List aggregates, boolean grouping) { + private List doMapping(List aggregates, boolean grouping) { AttributeMap attrToExpressions = new AttributeMap<>(); - aggregates.stream().flatMap(agg -> map(agg, grouping)).forEach(ne -> attrToExpressions.put(ne.toAttribute(), ne)); + aggregates.stream().flatMap(ne -> map(ne, grouping)).forEach(ne -> attrToExpressions.put(ne.toAttribute(), ne)); return attrToExpressions.values().stream().toList(); } - public List mapGrouping(Expression aggregate) { + public List mapGrouping(NamedExpression aggregate) { return map(aggregate, true).toList(); } - private Stream map(Expression aggregate, boolean grouping) { - return cache.computeIfAbsent(Alias.unwrap(aggregate), aggKey -> computeEntryForAgg(aggKey, grouping)).stream(); + private Stream map(NamedExpression ne, boolean grouping) { + return cache.computeIfAbsent(Alias.unwrap(ne), aggKey -> computeEntryForAgg(ne.name(), aggKey, grouping)).stream(); } - private static List computeEntryForAgg(Expression aggregate, boolean grouping) { - var aggDef = aggDefOrNull(aggregate, grouping); - if (aggDef != null) { - var is = getNonNull(aggDef); - var exp = isToNE(is).toList(); - return exp; + private static List computeEntryForAgg(String aggAlias, Expression aggregate, boolean grouping) { + if (aggregate instanceof AggregateFunction aggregateFunction) { + return entryForAgg(aggAlias, aggregateFunction, grouping); } if (aggregate instanceof FieldAttribute || aggregate instanceof MetadataAttribute || aggregate instanceof ReferenceAttribute) { - // This condition is a little pedantic, but do we expected other expressions here? if so, then add them + // This condition is a little pedantic, but do we expect other expressions here? if so, then add them return List.of(); - } else { - throw new EsqlIllegalArgumentException("unknown agg: " + aggregate.getClass() + ": " + aggregate); } + throw new EsqlIllegalArgumentException("unknown agg: " + aggregate.getClass() + ": " + aggregate); + } + + private static List entryForAgg(String aggAlias, AggregateFunction aggregateFunction, boolean grouping) { + var aggDef = new AggDef( + aggregateFunction.getClass(), + dataTypeToString(aggregateFunction.field().dataType(), aggregateFunction.getClass()), + aggregateFunction instanceof SpatialCentroid ? "SourceValues" : "", + grouping + ); + var is = getNonNull(aggDef); + return isToNE(is, aggAlias).toList(); } /** Gets the agg from the mapper - wrapper around map::get for more informative failure.*/ private static List getNonNull(AggDef aggDef) { - var l = mapper.get(aggDef); + var l = MAPPER.get(aggDef); if (l == null) { throw new EsqlIllegalArgumentException("Cannot find intermediate state for: " + aggDef); } @@ -199,18 +207,6 @@ private static Stream groupingAndNonGrouping(Tuple, Tuple lookupIntermediateState(AggDef aggDef) { try { @@ -257,7 +253,7 @@ private static String determinePackageName(Class clazz) { } /** Maps intermediate state description to named expressions. */ - private static Stream isToNE(List intermediateStateDescs) { + private static Stream isToNE(List intermediateStateDescs, String aggAlias) { return intermediateStateDescs.stream().map(is -> { final DataType dataType; if (Strings.isEmpty(is.dataType())) { @@ -265,7 +261,7 @@ private static Stream isToNE(List interm } else { dataType = DataType.fromEs(is.dataType()); } - return new ReferenceAttribute(Source.EMPTY, is.name(), dataType); + return new ReferenceAttribute(Source.EMPTY, Attribute.rawTemporaryName(aggAlias, is.name()), dataType); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 6123a464378f1..879a413615202 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -255,7 +255,7 @@ public void testCountFieldWithEval() { var esStatsQuery = as(exg.child(), EsStatsQueryExec.class); assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + assertThat(Expressions.names(esStatsQuery.output()), contains("$$c$count", "$$c$seen")); var stat = as(esStatsQuery.stats().get(0), Stat.class); assertThat(stat.query(), is(QueryBuilders.existsQuery("salary"))); } @@ -276,7 +276,7 @@ public void testCountOneFieldWithFilter() { var exchange = as(agg.child(), ExchangeExec.class); var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + assertThat(Expressions.names(esStatsQuery.output()), contains("$$c$count", "$$c$seen")); var stat = as(esStatsQuery.stats().get(0), Stat.class); Source source = new Source(2, 8, "salary > 1000"); var exists = QueryBuilders.existsQuery("salary"); @@ -386,7 +386,7 @@ public void testAnotherCountAllWithFilter() { var exchange = as(agg.child(), ExchangeExec.class); var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + assertThat(Expressions.names(esStatsQuery.output()), contains("$$c$count", "$$c$seen")); var source = ((SingleValueQuery.Builder) esStatsQuery.query()).source(); var expected = wrapWithSingleQuery(query, QueryBuilders.rangeQuery("emp_no").gt(10010), "emp_no", source); assertThat(expected.toString(), is(esStatsQuery.query().toString())); @@ -997,7 +997,7 @@ public boolean exists(String field) { var exchange = as(agg.child(), ExchangeExec.class); assertThat(exchange.inBetweenAggs(), is(true)); var localSource = as(exchange.child(), LocalSourceExec.class); - assertThat(Expressions.names(localSource.output()), contains("count", "seen")); + assertThat(Expressions.names(localSource.output()), contains("$$c$count", "$$c$seen")); } /** @@ -1152,7 +1152,7 @@ public void testIsNotNull_TextField_Pushdown_WithCount() { var exg = as(agg.child(), ExchangeExec.class); var esStatsQuery = as(exg.child(), EsStatsQueryExec.class); assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + assertThat(Expressions.names(esStatsQuery.output()), contains("$$c$count", "$$c$seen")); var stat = as(esStatsQuery.stats().get(0), Stat.class); assertThat(stat.query(), is(QueryBuilders.existsQuery("job"))); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index c35f01e9fe774..9682bb1c8b076 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.compute.aggregation.AggregatorMode; import org.elasticsearch.core.Tuple; import org.elasticsearch.geometry.Circle; import org.elasticsearch.geometry.Polygon; @@ -127,6 +128,7 @@ import org.elasticsearch.xpack.esql.stats.SearchStats; import org.junit.Before; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; @@ -2286,6 +2288,58 @@ public void testFieldExtractWithoutSourceAttributes() { ); } + public void testVerifierOnMissingReferences() { + var plan = physicalPlan(""" + from test + | stats s = sum(salary) by emp_no + | where emp_no > 10 + """); + + plan = plan.transformUp( + AggregateExec.class, + a -> new AggregateExec( + a.source(), + a.child(), + a.groupings(), + List.of(), // remove the aggs (and thus the groupings) entirely + a.getMode(), + a.intermediateAttributes(), + a.estimatedRowSize() + ) + ); + final var finalPlan = plan; + var e = expectThrows(IllegalStateException.class, () -> physicalPlanOptimizer.verify(finalPlan)); + assertThat(e.getMessage(), containsString(" > 10[INTEGER]]] optimized incorrectly due to missing references [emp_no{f}#")); + } + + public void testVerifierOnDuplicateOutputAttributes() { + var plan = physicalPlan(""" + from test + | stats s = sum(salary) by emp_no + | where emp_no > 10 + """); + + plan = plan.transformUp(AggregateExec.class, a -> { + List intermediates = new ArrayList<>(a.intermediateAttributes()); + intermediates.add(intermediates.get(0)); + return new AggregateExec( + a.source(), + a.child(), + a.groupings(), + a.aggregates(), + AggregatorMode.INTERMEDIATE, // FINAL would deduplicate aggregates() + intermediates, + a.estimatedRowSize() + ); + }); + final var finalPlan = plan; + var e = expectThrows(IllegalStateException.class, () -> physicalPlanOptimizer.verify(finalPlan)); + assertThat( + e.getMessage(), + containsString("Plan [LimitExec[1000[INTEGER]]] optimized incorrectly due to duplicate output attribute emp_no{f}#") + ); + } + public void testProjectAwayColumns() { var rule = new ProjectAwayColumns(); @@ -2557,7 +2611,7 @@ public boolean exists(String field) { var exchange = asRemoteExchange(aggregate.child()); var localSourceExec = as(exchange.child(), LocalSourceExec.class); - assertThat(Expressions.names(localSourceExec.output()), contains("languages", "min", "seen")); + assertThat(Expressions.names(localSourceExec.output()), contains("languages", "$$m$min", "$$m$seen")); } /** @@ -2593,9 +2647,9 @@ public void testPartialAggFoldingOutput() { var limit = as(optimized, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); var exchange = as(agg.child(), ExchangeExec.class); - assertThat(Expressions.names(exchange.output()), contains("count", "seen")); + assertThat(Expressions.names(exchange.output()), contains("$$c$count", "$$c$seen")); var source = as(exchange.child(), LocalSourceExec.class); - assertThat(Expressions.names(source.output()), contains("count", "seen")); + assertThat(Expressions.names(source.output()), contains("$$c$count", "$$c$seen")); } /** @@ -2627,7 +2681,7 @@ public void testGlobalAggFoldingOutput() { var aggFinal = as(limit.child(), AggregateExec.class); var aggPartial = as(aggFinal.child(), AggregateExec.class); // The partial aggregation's output is determined via AbstractPhysicalOperationProviders.intermediateAttributes() - assertThat(Expressions.names(aggPartial.output()), contains("count", "seen")); + assertThat(Expressions.names(aggPartial.output()), contains("$$c$count", "$$c$seen")); limit = as(aggPartial.child(), LimitExec.class); var exchange = as(limit.child(), ExchangeExec.class); var project = as(exchange.child(), ProjectExec.class); @@ -2665,9 +2719,15 @@ public void testPartialAggFoldingOutputForSyntheticAgg() { var aggFinal = as(limit.child(), AggregateExec.class); assertThat(aggFinal.output(), hasSize(2)); var exchange = as(aggFinal.child(), ExchangeExec.class); - assertThat(Expressions.names(exchange.output()), contains("sum", "seen", "count", "seen")); + assertThat( + Expressions.names(exchange.output()), + contains("$$SUM$a$0$sum", "$$SUM$a$0$seen", "$$COUNT$a$1$count", "$$COUNT$a$1$seen") + ); var source = as(exchange.child(), LocalSourceExec.class); - assertThat(Expressions.names(source.output()), contains("sum", "seen", "count", "seen")); + assertThat( + Expressions.names(source.output()), + contains("$$SUM$a$0$sum", "$$SUM$a$0$seen", "$$COUNT$a$1$count", "$$COUNT$a$1$seen") + ); } /**