diff --git a/docs/changelog/126614.yaml b/docs/changelog/126614.yaml new file mode 100644 index 0000000000000..e8424c8c78245 --- /dev/null +++ b/docs/changelog/126614.yaml @@ -0,0 +1,5 @@ +pr: 126614 +summary: Fix join masking eval +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index f609fc2bec403..8e37c7aaadfbc 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -55,8 +55,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase { "Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026 "optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781 "No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418 - "JOIN left field .* is incompatible with right field", // https://github.com/elastic/elasticsearch/issues/126419 - "Unsupported type .* for enrich", // most likely still https://github.com/elastic/elasticsearch/issues/126419 "The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework ); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index dc9b7e445a35a..bc9d820847710 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1586,3 +1586,45 @@ from * salary_change.long:double|foo:long 5.0 |1698069301543123456 ; + + +joinMaskingEval +required_capability: join_lookup_v12 +required_capability: fix_join_masking_eval +from languag* +| eval type = null +| rename language_name as message +| lookup join message_types_lookup on message +| rename type as message +| lookup join message_types_lookup on message +| keep `language.name` +; + +ignoreOrder:true +language.name:text +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +null +English +French +Spanish +German +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 8695186cad70b..c6be321900367 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1005,7 +1005,13 @@ public enum Cap { /** * Support loading of ip fields if they are not indexed. */ - LOADING_NON_INDEXED_IP_FIELDS; + LOADING_NON_INDEXED_IP_FIELDS, + + /** + * During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values + * https://github.com/elastic/elasticsearch/issues/126419 + */ + FIX_JOIN_MASKING_EVAL; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 9330df751c00d..d53d2cdf7e3ee 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -60,13 +60,24 @@ import org.elasticsearch.xpack.esql.parser.QueryParams; import org.elasticsearch.xpack.esql.plan.IndexPattern; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Filter; import org.elasticsearch.xpack.esql.plan.logical.Fork; +import org.elasticsearch.xpack.esql.plan.logical.InlineStats; +import org.elasticsearch.xpack.esql.plan.logical.Insist; import org.elasticsearch.xpack.esql.plan.logical.Keep; +import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.MvExpand; +import org.elasticsearch.xpack.esql.plan.logical.OrderBy; import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; +import org.elasticsearch.xpack.esql.plan.logical.Rename; +import org.elasticsearch.xpack.esql.plan.logical.TopN; import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation; +import org.elasticsearch.xpack.esql.plan.logical.inference.Completion; import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan; import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin; import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes; @@ -500,6 +511,7 @@ private void preAnalyzeMainIndices( /** * Check if there are any clusters to search. + * * @return true if there are no clusters to search, false otherwise */ private boolean allCCSClustersSkipped( @@ -612,6 +624,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy var keepJoinRefsBuilder = AttributeSet.builder(); Set wildcardJoinIndices = new java.util.HashSet<>(); + boolean[] canRemoveAliases = new boolean[] { true }; + parsed.forEachDown(p -> {// go over each plan top-down if (p instanceof RegexExtract re) { // for Grok and Dissect // remove other down-the-tree references to the extracted fields @@ -657,20 +671,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } } - // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree - // for example "from test | eval x = salary | stats max = max(x) by gender" - // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" - AttributeSet planRefs = p.references(); - Set fieldNames = planRefs.names(); - p.forEachExpressionDown(Alias.class, alias -> { - // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id" - // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" - if (fieldNames.contains(alias.name())) { - return; - } - referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr))); - }); + // If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of + // command that we may add in the future which can override already defined Aliases with EVAL + // (for example + // + // from test + // | eval ip = 123 + // | enrich ips_policy ON hostname + // | rename ip AS my_ip + // + // and ips_policy enriches the results with the same name ip field), + // these aliases should be kept in the list of fields. + if (canRemoveAliases[0] && couldOverrideAliases(p)) { + canRemoveAliases[0] = false; + } + if (canRemoveAliases[0]) { + // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree + // for example "from test | eval x = salary | stats max = max(x) by gender" + // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" + AttributeSet planRefs = p.references(); + Set fieldNames = planRefs.names(); + p.forEachExpressionDown(Alias.class, alias -> { + // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id" + // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" + if (fieldNames.contains(alias.name())) { + return; + } + referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr))); + }); + } }); + // Add JOIN ON column references afterward to avoid Alias removal referencesBuilder.addAll(keepJoinRefsBuilder); // If any JOIN commands need wildcard field-caps calls, persist the index names @@ -694,6 +725,32 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } } + /** + * Could a plan "accidentally" override aliases? + * Examples are JOIN and ENRICH, that _could_ produce fields with the same + * name of an existing alias, based on their index mapping. + * Here we just have to consider commands where this information is not available before index resolution, + * eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know. + */ + private static boolean couldOverrideAliases(LogicalPlan p) { + return (p instanceof Aggregate + || p instanceof Completion + || p instanceof Drop + || p instanceof Eval + || p instanceof Filter + || p instanceof Fork + || p instanceof InlineStats + || p instanceof Insist + || p instanceof Keep + || p instanceof Limit + || p instanceof MvExpand + || p instanceof OrderBy + || p instanceof Project + || p instanceof RegexExtract + || p instanceof Rename + || p instanceof TopN) == false; + } + private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { boolean isPattern = Regex.isSimpleMatchPattern(attr.name()); if (skipIfPattern && isPattern) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 028a59992b5bc..ef54dec2a0b60 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -478,13 +478,16 @@ public void testDropAllColumns_WithStats() { } public void testEnrichOn() { - assertFieldNames(""" - from employees - | sort emp_no - | limit 1 - | eval x = to_string(languages) - | enrich languages_policy on x - | keep emp_no, language_name""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")); + assertFieldNames( + """ + from employees + | sort emp_no + | limit 1 + | eval x = to_string(languages) + | enrich languages_policy on x + | keep emp_no, language_name""", + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") + ); } public void testEnrichOn2() { @@ -494,7 +497,7 @@ public void testEnrichOn2() { | enrich languages_policy on x | keep emp_no, language_name | sort emp_no - | limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")); + | limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")); } public void testUselessEnrich() { @@ -512,7 +515,7 @@ public void testSimpleSortLimit() { | enrich languages_policy on x | keep emp_no, language_name | sort emp_no - | limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*")); + | limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*")); } public void testWith() { @@ -520,7 +523,7 @@ public void testWith() { """ from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 1 | enrich languages_policy on x with language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -529,7 +532,7 @@ public void testWithAlias() { """ from employees | sort emp_no | limit 3 | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with lang = language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -538,7 +541,7 @@ public void testWithAliasSort() { """ from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3 | enrich languages_policy on x with lang = language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -547,7 +550,7 @@ public void testWithAliasAndPlain() { """ from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with lang = language_name, language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -556,7 +559,7 @@ public void testWithTwoAliasesSameProp() { """ from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with lang = language_name, lang2 = language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -565,7 +568,7 @@ public void testRedundantWith() { """ from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with language_name, language_name""", - Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*") + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*") ); } @@ -588,28 +591,34 @@ public void testConstantNullInput() { | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with language_name, language_name""", - Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*") + Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*") ); } public void testEnrichEval() { - assertFieldNames(""" - from employees - | eval x = to_string(languages) - | enrich languages_policy on x with lang = language_name - | eval language = concat(x, "-", lang) - | keep emp_no, x, lang, language - | sort emp_no desc | limit 3""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*")); + assertFieldNames( + """ + from employees + | eval x = to_string(languages) + | enrich languages_policy on x with lang = language_name + | eval language = concat(x, "-", lang) + | keep emp_no, x, lang, language + | sort emp_no desc | limit 3""", + Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*") + ); } public void testSimple() { - assertFieldNames(""" - from employees - | eval x = 1, y = to_string(languages) - | enrich languages_policy on y - | where x > 1 - | keep emp_no, language_name - | limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")); + assertFieldNames( + """ + from employees + | eval x = 1, y = to_string(languages) + | enrich languages_policy on y + | where x > 1 + | keep emp_no, language_name + | limit 1""", + Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "y", "x.*", "y.*") + ); } public void testEvalNullSort() { @@ -1653,6 +1662,54 @@ public void testInsist_multiFieldMappedMultiIndex() { ); } + public void testJoinMaskingKeep() { + assertFieldNames( + """ + from languag* + | eval type = null + | rename language_name as message + | lookup join message_types_lookup on message + | rename type as message + | lookup join message_types_lookup on message + | keep `language.name`""", + Set.of("language.name", "type", "language_name", "message", "language_name.*", "message.*", "type.*", "language.name.*") + ); + } + + public void testJoinMaskingKeep2() { + assertFieldNames(""" + from languag* + | eval type = "foo" + | rename type as message + | lookup join message_types_lookup on message + | rename type as message + | lookup join message_types_lookup on message + | keep `language.name`""", Set.of("language.name", "type", "message", "message.*", "type.*", "language.name.*")); + } + + public void testEnrichMaskingEvalOn() { + assertFieldNames(""" + from employees + | eval language_name = null + | enrich languages_policy on languages + | rename language_name as languages + | eval languages = length(languages) + | enrich languages_policy on languages + | keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*")); + } + + public void testEnrichAndJoinMaskingEvalWh() { + assertFieldNames(""" + from employees + | eval language_name = null + | enrich languages_policy on languages + | rename language_name as languages + | eval languages = length(languages) + | enrich languages_policy on languages + | lookup join message_types_lookup on language_name + | keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*")); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();