Skip to content

Commit de5be24

Browse files
committed
Do not recommend increasing max_shards_per_node (#120458)
Today if the `shards_capacity` health indicator detects a problem then it recommends increasing the limit, which goes against the advice in the manual about not increasing these limits and also makes it rather pointless having a limit in the first place. This commit improves the recommendation to suggest either adding nodes or else reducing the shard count.
1 parent 7678eb3 commit de5be24

File tree

5 files changed

+46
-14
lines changed

5 files changed

+46
-14
lines changed

docs/changelog/120458.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 120458
2+
summary: Do not recommend increasing `max_shards_per_node`
3+
area: Health
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/common/ReferenceDocs.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ public enum ReferenceDocs {
8383
JDK_LOCALE_DIFFERENCES,
8484
ALLOCATION_EXPLAIN_MAX_RETRY,
8585
SECURE_SETTINGS,
86+
CLUSTER_SHARD_LIMIT,
8687
// this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner
8788
;
8889

server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.cluster.metadata.Metadata;
1313
import org.elasticsearch.cluster.node.DiscoveryNodes;
1414
import org.elasticsearch.cluster.service.ClusterService;
15+
import org.elasticsearch.common.ReferenceDocs;
1516
import org.elasticsearch.common.TriFunction;
1617
import org.elasticsearch.common.settings.Setting;
1718
import org.elasticsearch.features.FeatureService;
@@ -55,21 +56,18 @@ public class ShardsCapacityHealthIndicatorService implements HealthIndicatorServ
5556
"The cluster is running low on room to add new shards. Adding data to new indices is at risk";
5657
private static final String INDEX_CREATION_RISK =
5758
"The cluster is running low on room to add new shards. Adding data to new indices might soon fail.";
58-
private static final String HELP_GUIDE = "https://ela.st/fix-shards-capacity";
5959
private static final TriFunction<String, Setting<?>, String, Diagnosis> SHARD_MAX_CAPACITY_REACHED_FN = (
6060
id,
6161
setting,
6262
indexType) -> new Diagnosis(
6363
new Diagnosis.Definition(
6464
NAME,
6565
id,
66-
"Elasticsearch is about to reach the maximum number of shards it can host, based on your current settings.",
67-
"Increase the value of ["
68-
+ setting.getKey()
69-
+ "] cluster setting or remove "
66+
"Elasticsearch is about to reach the maximum number of shards it can host as set by [" + setting.getKey() + "].",
67+
"Increase the number of nodes in your cluster or remove some "
7068
+ indexType
71-
+ " indices to clear up resources.",
72-
HELP_GUIDE
69+
+ " indices to reduce the number of shards in the cluster.",
70+
ReferenceDocs.CLUSTER_SHARD_LIMIT.toString()
7371
),
7472
null
7573
);
@@ -83,12 +81,12 @@ public class ShardsCapacityHealthIndicatorService implements HealthIndicatorServ
8381
new HealthIndicatorImpact(NAME, "creation_of_new_indices_at_risk", 2, INDEX_CREATION_RISK, List.of(ImpactArea.INGEST))
8482
);
8583
static final Diagnosis SHARDS_MAX_CAPACITY_REACHED_DATA_NODES = SHARD_MAX_CAPACITY_REACHED_FN.apply(
86-
"increase_max_shards_per_node",
84+
"decrease_shards_per_non_frozen_node",
8785
ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE,
88-
"data"
86+
"non-frozen"
8987
);
9088
static final Diagnosis SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES = SHARD_MAX_CAPACITY_REACHED_FN.apply(
91-
"increase_max_shards_per_node_frozen",
89+
"decrease_shards_per_frozen_node",
9290
ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN,
9391
"frozen"
9492
);

server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ FORMING_SINGLE_NODE_CLUSTERS modules-discover
4545
JDK_LOCALE_DIFFERENCES mapping-date-format.html#custom-date-format-locales
4646
ALLOCATION_EXPLAIN_MAX_RETRY cluster-allocation-explain.html#maximum-number-of-retries-exceeded
4747
SECURE_SETTINGS secure-settings.html
48+
CLUSTER_SHARD_LIMIT misc-cluster-settings.html#cluster-shard-limit

server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@
6262
import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP;
6363
import static org.elasticsearch.indices.ShardLimitValidator.INDEX_SETTING_SHARD_LIMIT_GROUP;
6464
import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP;
65+
import static org.elasticsearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE;
66+
import static org.elasticsearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN;
67+
import static org.hamcrest.Matchers.allOf;
68+
import static org.hamcrest.Matchers.containsString;
6569
import static org.hamcrest.Matchers.equalTo;
6670
import static org.hamcrest.Matchers.hasSize;
6771
import static org.hamcrest.Matchers.is;
@@ -93,7 +97,6 @@ public void setUp() throws Exception {
9397
.build();
9498

9599
clusterService = ClusterServiceUtils.createClusterService(threadPool);
96-
97100
featureService = Mockito.mock(FeatureService.class);
98101
Mockito.when(featureService.clusterHasFeature(any(), any())).thenReturn(true);
99102
}
@@ -159,6 +162,30 @@ public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException {
159162
);
160163
}
161164

165+
public void testDiagnoses() {
166+
assertEquals("shards_capacity", SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().indicatorName());
167+
assertEquals("decrease_shards_per_non_frozen_node", SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().id());
168+
assertThat(
169+
SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().cause(),
170+
allOf(containsString("maximum number of shards"), containsString(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()))
171+
);
172+
assertThat(
173+
SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().action(),
174+
allOf(containsString("Increase the number of nodes in your cluster"), containsString("remove some non-frozen indices"))
175+
);
176+
177+
assertEquals("shards_capacity", SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().indicatorName());
178+
assertEquals("decrease_shards_per_frozen_node", SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().id());
179+
assertThat(
180+
SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().cause(),
181+
allOf(containsString("maximum number of shards"), containsString(SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey()))
182+
);
183+
assertThat(
184+
SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().action(),
185+
allOf(containsString("Increase the number of nodes in your cluster"), containsString("remove some frozen indices"))
186+
);
187+
}
188+
162189
public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOException {
163190
{
164191
// Only data_nodes does not have enough space
@@ -378,11 +405,11 @@ public void testCalculateMethods() {
378405
public void testMappedFieldsForTelemetry() {
379406
assertEquals(ShardsCapacityHealthIndicatorService.NAME, "shards_capacity");
380407
assertEquals(
381-
"elasticsearch:health:shards_capacity:diagnosis:increase_max_shards_per_node",
408+
"elasticsearch:health:shards_capacity:diagnosis:decrease_shards_per_non_frozen_node",
382409
SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().getUniqueId()
383410
);
384411
assertEquals(
385-
"elasticsearch:health:shards_capacity:diagnosis:increase_max_shards_per_node_frozen",
412+
"elasticsearch:health:shards_capacity:diagnosis:decrease_shards_per_frozen_node",
386413
SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().getUniqueId()
387414
);
388415
}
@@ -442,7 +469,7 @@ private ClusterState createClusterState(
442469
var metadata = Metadata.builder()
443470
.persistentSettings(
444471
Settings.builder()
445-
.put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode)
472+
.put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode)
446473
.put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey(), maxShardsPerNodeFrozen)
447474
.build()
448475
);

0 commit comments

Comments
 (0)