Fix get all inference endponts not returning multiple endpoints shari…

…ng model deployment
elastic · dan-rubinstein · Feb 10, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 6, 2025
commit 23175fcc7fcd904a28c60f45f03b35777e25d29b
diff --git a/...rg/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/...rg/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
@@ -843,12 +843,20 @@ public void updateModelsWithDynamicFields(List<Model> models, ActionListener<Lis
             return;
         }
 
-        var modelsByDeploymentIds = new HashMap<String, ElasticsearchInternalModel>();
+        var modelsByDeploymentIds = new HashMap<String, List<ElasticsearchInternalModel>>();
         for (var model : models) {
             assert model instanceof ElasticsearchInternalModel;
 
             if (model instanceof ElasticsearchInternalModel esModel) {
-                modelsByDeploymentIds.put(esModel.mlNodeDeploymentId(), esModel);
+                if (modelsByDeploymentIds.containsKey(esModel.mlNodeDeploymentId()) == false) {
+                    modelsByDeploymentIds.put(esModel.mlNodeDeploymentId(), new ArrayList<>() {
+                        {
+                            add(esModel);
+                        }
+                    });
+                } else {
+                    modelsByDeploymentIds.get(esModel.mlNodeDeploymentId()).add(esModel);
+                }
             } else {
                 listener.onFailure(
                     new ElasticsearchStatusException(
@@ -867,10 +875,13 @@ public void updateModelsWithDynamicFields(List<Model> models, ActionListener<Lis
             new GetDeploymentStatsAction.Request(deploymentIds),
             ActionListener.wrap(stats -> {
                 for (var deploymentStats : stats.getStats().results()) {
-                    var model = modelsByDeploymentIds.get(deploymentStats.getDeploymentId());
-                    model.updateNumAllocations(deploymentStats.getNumberOfAllocations());
+                    var modelsForDeploymentId = modelsByDeploymentIds.get(deploymentStats.getDeploymentId());
+                    modelsForDeploymentId.forEach(model -> model.updateNumAllocations(deploymentStats.getNumberOfAllocations()));
                 }
-                listener.onResponse(new ArrayList<>(modelsByDeploymentIds.values()));
+                var updatedModels = new ArrayList<Model>();
+                modelsByDeploymentIds.values().forEach(updatedModels::addAll);
+
+                listener.onResponse(updatedModels);
             }, e -> {
                 logger.warn("Get deployment stats failed, cannot update the endpoint's number of allocations", e);
                 // continue with the original response

diff --git a/...asticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/...asticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java
@@ -46,12 +46,14 @@
 import org.elasticsearch.xpack.core.inference.results.ChunkedInferenceEmbeddingSparse;
 import org.elasticsearch.xpack.core.inference.results.ChunkedInferenceError;
 import org.elasticsearch.xpack.core.ml.MachineLearningField;
+import org.elasticsearch.xpack.core.ml.action.GetDeploymentStatsAction;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
 import org.elasticsearch.xpack.core.ml.action.InferTrainedModelDeploymentAction;
 import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentStats;
 import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResultsTests;
@@ -72,8 +74,10 @@
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@@ -101,6 +105,9 @@
 import static org.mockito.ArgumentMatchers.same;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
 public class ElasticsearchInternalServiceTests extends ESTestCase {
@@ -1632,6 +1639,148 @@ public void testGetConfiguration() throws Exception {
         }
     }
 
+    public void testUpdateModelsWithDynamicFields_NoModelsToUpdate() throws Exception {
+        ActionListener<List<Model>> resultsListener = ActionListener.<List<Model>>wrap(
+            updatedModels -> assertEquals(Collections.emptyList(), updatedModels),
+            e -> fail("Unexpected exception: " + e)
+        );
+
+        try (var service = createService(mock(Client.class))) {
+            service.updateModelsWithDynamicFields(List.of(), resultsListener);
+        }
+    }
+
+    public void testUpdateModelsWithDynamicFields_InvalidModelProvided() throws IOException {
+        ActionListener<List<Model>> resultsListener = ActionListener.wrap(
+            updatedModels -> fail("Expected invalid model assertion error to be thrown"),
+            e -> fail("Expected invalid model assertion error to be thrown")
+        );
+
+        try (var service = createService(mock(Client.class))) {
+            assertThrows(
+                AssertionError.class,
+                () -> { service.updateModelsWithDynamicFields(List.of(mock(Model.class)), resultsListener); }
+            );
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testUpdateModelsWithDynamicFields_FailsToRetrieveDeployments() throws IOException {
+        var deploymentId = randomAlphaOfLength(10);
+        var model = mock(ElasticsearchInternalModel.class);
+        when(model.mlNodeDeploymentId()).thenReturn(deploymentId);
+        when(model.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+
+        ActionListener<List<Model>> resultsListener = ActionListener.wrap(updatedModels -> {
+            assertEquals(updatedModels.size(), 1);
+            verify(model, times(2)).mlNodeDeploymentId();
+            verifyNoMoreInteractions(model);
+        }, e -> fail("Expected original models to be returned"));
+
+        var client = mock(Client.class);
+        when(client.threadPool()).thenReturn(threadPool);
+        doAnswer(invocation -> {
+            var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocation.getArguments()[2];
+            listener.onFailure(new RuntimeException(randomAlphaOfLength(10)));
+            return null;
+        }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
+
+        try (var service = createService(client)) {
+            service.updateModelsWithDynamicFields(List.of(model), resultsListener);
+        }
+    }
+
+    public void testUpdateModelsWithDynamicFields_SingleModelToUpdate() throws IOException {
+        var deploymentId = randomAlphaOfLength(10);
+        var model = mock(ElasticsearchInternalModel.class);
+        when(model.mlNodeDeploymentId()).thenReturn(deploymentId);
+        when(model.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+
+        var modelsByDeploymentId = new HashMap<String, List<Model>>();
+        modelsByDeploymentId.put(deploymentId, List.of(model));
+
+        testUpdateModelsWithDynamicFields(modelsByDeploymentId);
+    }
+
+    public void testUpdateModelsWithDynamicFields_MultipleModelsWithDifferentDeploymentsToUpdate() throws IOException {
+        var deploymentId1 = randomAlphaOfLength(10);
+        var model1 = mock(ElasticsearchInternalModel.class);
+        when(model1.mlNodeDeploymentId()).thenReturn(deploymentId1);
+        when(model1.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+        var deploymentId2 = randomAlphaOfLength(10);
+        var model2 = mock(ElasticsearchInternalModel.class);
+        when(model2.mlNodeDeploymentId()).thenReturn(deploymentId2);
+        when(model2.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+
+        var modelsByDeploymentId = new HashMap<String, List<Model>>();
+        modelsByDeploymentId.put(deploymentId1, List.of(model1));
+        modelsByDeploymentId.put(deploymentId2, List.of(model2));
+
+        testUpdateModelsWithDynamicFields(modelsByDeploymentId);
+    }
+
+    public void testUpdateModelsWithDynamicFields_MultipleModelsWithSameDeploymentsToUpdate() throws IOException {
+        var deploymentId = randomAlphaOfLength(10);
+        var model1 = mock(ElasticsearchInternalModel.class);
+        when(model1.mlNodeDeploymentId()).thenReturn(deploymentId);
+        when(model1.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+        var model2 = mock(ElasticsearchInternalModel.class);
+        when(model2.mlNodeDeploymentId()).thenReturn(deploymentId);
+        when(model2.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING);
+
+        var modelsByDeploymentId = new HashMap<String, List<Model>>();
+        modelsByDeploymentId.put(deploymentId, List.of(model1, model2));
+
+        testUpdateModelsWithDynamicFields(modelsByDeploymentId);
+    }
+
+    @SuppressWarnings("unchecked")
+    private void testUpdateModelsWithDynamicFields(Map<String, List<Model>> modelsByDeploymentId) throws IOException {
+        var modelsToUpdate = new ArrayList<Model>();
+        modelsByDeploymentId.values().forEach(modelsToUpdate::addAll);
+
+        var updatedNumberOfAllocations = new HashMap<String, Integer>();
+        modelsByDeploymentId.keySet().forEach(deploymentId -> updatedNumberOfAllocations.put(deploymentId, randomIntBetween(1, 10)));
+
+        ActionListener<List<Model>> resultsListener = ActionListener.wrap(updatedModels -> {
+            assertEquals(updatedModels.size(), modelsToUpdate.size());
+            modelsByDeploymentId.forEach((deploymentId, models) -> {
+                var expectedNumberOfAllocations = updatedNumberOfAllocations.get(deploymentId);
+                models.forEach(model -> {
+                    verify((ElasticsearchInternalModel) model).updateNumAllocations(expectedNumberOfAllocations);
+                    verify((ElasticsearchInternalModel) model, times(2)).mlNodeDeploymentId();
+                    verifyNoMoreInteractions(model);
+                });
+            });
+        }, e -> fail("Unexpected exception: " + e));
+
+        var client = mock(Client.class);
+        when(client.threadPool()).thenReturn(threadPool);
+        doAnswer(invocation -> {
+            var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocation.getArguments()[2];
+            var mockAssignmentStats = new ArrayList<AssignmentStats>();
+            modelsByDeploymentId.keySet().forEach(deploymentId -> {
+                var mockAssignmentStatsForDeploymentId = mock(AssignmentStats.class);
+                when(mockAssignmentStatsForDeploymentId.getDeploymentId()).thenReturn(deploymentId);
+                when(mockAssignmentStatsForDeploymentId.getNumberOfAllocations()).thenReturn(updatedNumberOfAllocations.get(deploymentId));
+                mockAssignmentStats.add(mockAssignmentStatsForDeploymentId);
+            });
+            listener.onResponse(
+                new GetDeploymentStatsAction.Response(
+                    Collections.emptyList(),
+                    Collections.emptyList(),
+                    mockAssignmentStats,
+                    mockAssignmentStats.size()
+                )
+            );
+            return null;
+        }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
+
+        try (var service = createService(client)) {
+            service.updateModelsWithDynamicFields(modelsToUpdate, resultsListener);
+        }
+    }
+
     private ElasticsearchInternalService createService(Client client) {
         var cs = mock(ClusterService.class);
         var cSettings = new ClusterSettings(Settings.EMPTY, Set.of(MachineLearningField.MAX_LAZY_ML_NODES));