-
Notifications
You must be signed in to change notification settings - Fork 25.2k
ES|QL: fix join masking eval #126614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ES|QL: fix join masking eval #126614
Changes from all commits
27865d7
0d35e21
08803dc
b820057
fc44c26
132c587
dcc9e18
fb205bd
3c1ccea
7d5d67b
812cbe8
5f3b001
f2f0a3d
ae267ac
6720f9b
1a23c32
ab77736
cf4719d
ecb72e9
d6fdb22
e6f51bb
8744884
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 126614 | ||
summary: Fix join masking eval | ||
area: ES|QL | ||
type: bug | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,13 +60,24 @@ | |
import org.elasticsearch.xpack.esql.parser.QueryParams; | ||
import org.elasticsearch.xpack.esql.plan.IndexPattern; | ||
import org.elasticsearch.xpack.esql.plan.logical.Aggregate; | ||
import org.elasticsearch.xpack.esql.plan.logical.Drop; | ||
import org.elasticsearch.xpack.esql.plan.logical.Enrich; | ||
import org.elasticsearch.xpack.esql.plan.logical.Eval; | ||
import org.elasticsearch.xpack.esql.plan.logical.Filter; | ||
import org.elasticsearch.xpack.esql.plan.logical.Fork; | ||
import org.elasticsearch.xpack.esql.plan.logical.InlineStats; | ||
import org.elasticsearch.xpack.esql.plan.logical.Insist; | ||
import org.elasticsearch.xpack.esql.plan.logical.Keep; | ||
import org.elasticsearch.xpack.esql.plan.logical.Limit; | ||
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; | ||
import org.elasticsearch.xpack.esql.plan.logical.MvExpand; | ||
import org.elasticsearch.xpack.esql.plan.logical.OrderBy; | ||
import org.elasticsearch.xpack.esql.plan.logical.Project; | ||
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; | ||
import org.elasticsearch.xpack.esql.plan.logical.Rename; | ||
import org.elasticsearch.xpack.esql.plan.logical.TopN; | ||
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation; | ||
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion; | ||
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan; | ||
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin; | ||
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes; | ||
|
@@ -500,6 +511,7 @@ private void preAnalyzeMainIndices( | |
|
||
/** | ||
* Check if there are any clusters to search. | ||
* | ||
* @return true if there are no clusters to search, false otherwise | ||
*/ | ||
private boolean allCCSClustersSkipped( | ||
|
@@ -612,6 +624,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy | |
var keepJoinRefsBuilder = AttributeSet.builder(); | ||
Set<String> wildcardJoinIndices = new java.util.HashSet<>(); | ||
|
||
boolean[] canRemoveAliases = new boolean[] { true }; | ||
|
||
parsed.forEachDown(p -> {// go over each plan top-down | ||
if (p instanceof RegexExtract re) { // for Grok and Dissect | ||
// remove other down-the-tree references to the extracted fields | ||
|
@@ -657,20 +671,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy | |
} | ||
} | ||
|
||
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree | ||
// for example "from test | eval x = salary | stats max = max(x) by gender" | ||
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" | ||
AttributeSet planRefs = p.references(); | ||
Set<String> fieldNames = planRefs.names(); | ||
p.forEachExpressionDown(Alias.class, alias -> { | ||
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id" | ||
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" | ||
if (fieldNames.contains(alias.name())) { | ||
return; | ||
} | ||
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr))); | ||
}); | ||
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of | ||
// command that we may add in the future which can override already defined Aliases with EVAL | ||
// (for example | ||
// | ||
// from test | ||
// | eval ip = 123 | ||
// | enrich ips_policy ON hostname | ||
// | rename ip AS my_ip | ||
// | ||
// and ips_policy enriches the results with the same name ip field), | ||
// these aliases should be kept in the list of fields. | ||
if (canRemoveAliases[0] && couldOverrideAliases(p)) { | ||
canRemoveAliases[0] = false; | ||
} | ||
if (canRemoveAliases[0]) { | ||
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree | ||
// for example "from test | eval x = salary | stats max = max(x) by gender" | ||
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" | ||
AttributeSet planRefs = p.references(); | ||
Set<String> fieldNames = planRefs.names(); | ||
p.forEachExpressionDown(Alias.class, alias -> { | ||
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id" | ||
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" | ||
if (fieldNames.contains(alias.name())) { | ||
return; | ||
} | ||
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr))); | ||
}); | ||
} | ||
}); | ||
|
||
// Add JOIN ON column references afterward to avoid Alias removal | ||
referencesBuilder.addAll(keepJoinRefsBuilder); | ||
// If any JOIN commands need wildcard field-caps calls, persist the index names | ||
|
@@ -694,6 +725,32 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy | |
} | ||
} | ||
|
||
/** | ||
* Could a plan "accidentally" override aliases? | ||
* Examples are JOIN and ENRICH, that _could_ produce fields with the same | ||
* name of an existing alias, based on their index mapping. | ||
* Here we just have to consider commands where this information is not available before index resolution, | ||
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know. | ||
*/ | ||
private static boolean couldOverrideAliases(LogicalPlan p) { | ||
return (p instanceof Aggregate | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you listing all of these that cannot define an additional "hidden" Attribute instead of checking those that can? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid that it breaks when we add new commands that behave like JOIN. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we add a command that doesn't behave like JOIN, then it could break the other way around. Imo, it makes much more sense to list those commands that are "special". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is interesting, I'm not sure I have completely clear the implications of this aspect. If I got it right, we use these field names to limit the scope of the field_caps; if we add a field that does not exist in any of the involved indices, will field_caps fail? My understanding is that it will work fine, just ignoring the additional fields. If it wasn't the case, then we would be in a catch-22: we couldn't know which fields to send before validating the query, and we couldn't validate the query before sending the fields to field_caps. The fact that everything works fine makes me think that it's safe, but maybe I'm missing something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think Completion is safe (the target attribute is defined at parsing time), but I have to double-check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep, I see your point. Adding more fields is safer. Added less fields is problematic. Better more fields than less. |
||
|| p instanceof Completion | ||
|| p instanceof Drop | ||
|| p instanceof Eval | ||
|| p instanceof Filter | ||
|| p instanceof Fork | ||
|| p instanceof InlineStats | ||
|| p instanceof Insist | ||
|| p instanceof Keep | ||
|| p instanceof Limit | ||
|| p instanceof MvExpand | ||
|| p instanceof OrderBy | ||
|| p instanceof Project | ||
|| p instanceof RegexExtract | ||
|| p instanceof Rename | ||
|| p instanceof TopN) == false; | ||
} | ||
|
||
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { | ||
boolean isPattern = Regex.isSimpleMatchPattern(attr.name()); | ||
if (skipIfPattern && isPattern) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please, add this test and others (be creative) to
IndexResolverFieldNamesTests
.