Skip to content

Commit 01889f1

Browse files
HDDS-12031. Enable Ratis by default on an upgraded cluster during SCM start-up. (apache#7831)
1 parent 819ed25 commit 01889f1

File tree

4 files changed

+68
-38
lines changed

4 files changed

+68
-38
lines changed

hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/upgrade/UpgradeTestUtils.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
*/
1919
package org.apache.hadoop.ozone.upgrade;
2020

21+
import jakarta.annotation.Nullable;
2122
import org.apache.hadoop.ozone.upgrade.InjectedUpgradeFinalizationExecutor.UpgradeTestInjectionPoints;
2223
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
2324
import org.apache.hadoop.ozone.common.StorageInfo;
2425
import org.slf4j.Logger;
2526

2627
import java.io.File;
2728
import java.io.IOException;
29+
import java.util.Properties;
2830
import java.util.UUID;
2931
import java.util.concurrent.CountDownLatch;
3032

@@ -40,6 +42,12 @@ private UpgradeTestUtils() { }
4042
*/
4143
public static File createVersionFile(File parentDir,
4244
HddsProtos.NodeType nodeType, int mlv) throws IOException {
45+
return createVersionFile(parentDir, nodeType, mlv, null);
46+
}
47+
48+
public static File createVersionFile(File parentDir,
49+
HddsProtos.NodeType nodeType, int mlv,
50+
@Nullable Properties properties) throws IOException {
4351

4452
final String versionFileName = "VERSION";
4553

@@ -49,6 +57,11 @@ public static File createVersionFile(File parentDir,
4957
System.currentTimeMillis(),
5058
mlv);
5159

60+
if (properties != null) {
61+
properties.forEach((key, value) ->
62+
info.setProperty((String) key, (String) value));
63+
}
64+
5265
File versionFile = new File(parentDir, versionFileName);
5366
info.writeTo(versionFile);
5467

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
251251
private PipelineManager pipelineManager;
252252
private ContainerManager containerManager;
253253
private BlockManager scmBlockManager;
254-
private final SCMStorageConfig scmStorageConfig;
254+
private SCMStorageConfig scmStorageConfig;
255255
private NodeDecommissionManager scmDecommissionManager;
256256
private WritableContainerFactory writableContainerFactory;
257257
private FinalizationManager finalizationManager;
@@ -383,6 +383,14 @@ private StorageContainerManager(OzoneConfiguration conf,
383383
"failure.", ResultCodes.SCM_NOT_INITIALIZED);
384384
}
385385

386+
// Initialize Ratis if needed.
387+
// This is for the clusters which got upgraded from older version of Ozone.
388+
// We enable Ratis by default.
389+
if (!scmStorageConfig.isSCMHAEnabled()) {
390+
// Since we have initialized Ratis, we have to reload StorageConfig
391+
scmStorageConfig = initializeRatis(conf);
392+
}
393+
386394
threadNamePrefix = getScmNodeDetails().threadNamePrefix();
387395
primaryScmNodeId = scmStorageConfig.getPrimaryScmNodeId();
388396

@@ -1253,15 +1261,13 @@ public static boolean scmInit(OzoneConfiguration conf,
12531261
StorageState state = scmStorageConfig.getState();
12541262
final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf,
12551263
scmStorageConfig);
1256-
String primordialSCM = SCMHAUtils.getPrimordialSCM(conf);
1264+
final String primordialSCM = SCMHAUtils.getPrimordialSCM(conf);
12571265
final String selfNodeId = haDetails.getLocalNodeDetails().getNodeId();
12581266
final String selfHostName = haDetails.getLocalNodeDetails().getHostName();
1259-
if (primordialSCM != null && SCMHAUtils.isSCMHAEnabled(conf)
1260-
&& !SCMHAUtils.isPrimordialSCM(conf, selfNodeId, selfHostName)) {
1261-
LOG.info(
1262-
"SCM init command can only be executed in Primordial SCM {}, "
1263-
+ "self id {} "
1264-
+ "Ignoring it.", primordialSCM, selfNodeId);
1267+
if (primordialSCM != null &&
1268+
!SCMHAUtils.isPrimordialSCM(conf, selfNodeId, selfHostName)) {
1269+
LOG.info("SCM init command can only be executed on Primordial SCM. " +
1270+
"Primordial SCM ID: {}. Self ID: {}.", primordialSCM, selfNodeId);
12651271
return true;
12661272
}
12671273
if (state != StorageState.INITIALIZED) {
@@ -1291,16 +1297,7 @@ public static boolean scmInit(OzoneConfiguration conf,
12911297

12921298
scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
12931299
scmStorageConfig.initialize();
1294-
1295-
if (SCMHAUtils.isSCMHAEnabled(conf)) {
1296-
SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(),
1297-
scmStorageConfig.getScmId(), haDetails.getLocalNodeDetails(),
1298-
conf);
1299-
scmStorageConfig = new SCMStorageConfig(conf);
1300-
scmStorageConfig.setSCMHAFlag(true);
1301-
// Do force initialize to persist SCM_HA flag.
1302-
scmStorageConfig.forceInitialize();
1303-
}
1300+
scmStorageConfig = initializeRatis(conf);
13041301

13051302
LOG.info("SCM initialization succeeded. Current cluster id for sd={}"
13061303
+ "; cid={}; layoutVersion={}; scmId={}",
@@ -1312,26 +1309,19 @@ public static boolean scmInit(OzoneConfiguration conf,
13121309
return false;
13131310
}
13141311
} else {
1315-
clusterId = scmStorageConfig.getClusterID();
1316-
final boolean isSCMHAEnabled = scmStorageConfig.isSCMHAEnabled();
13171312

13181313
// Initialize security if security is enabled later.
13191314
initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, true);
13201315

1321-
if (SCMHAUtils.isSCMHAEnabled(conf) && !isSCMHAEnabled) {
1322-
SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(),
1323-
scmStorageConfig.getScmId(), haDetails.getLocalNodeDetails(),
1324-
conf);
1325-
scmStorageConfig.setSCMHAFlag(true);
1326-
scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
1327-
scmStorageConfig.forceInitialize();
1316+
// Enable Ratis if it's not already enabled.
1317+
if (!scmStorageConfig.isSCMHAEnabled()) {
1318+
scmStorageConfig = initializeRatis(conf);
13281319

13291320
/*
1330-
* Since Ratis is initialized on an existing cluster, we have to
1321+
* Since Ratis can be initialized on an existing cluster, we have to
13311322
* trigger Ratis snapshot so that this SCM can send the latest scm.db
13321323
* to the bootstrapping SCMs later.
13331324
*/
1334-
13351325
try {
13361326
SCMHAUtils.setRatisEnabled(true);
13371327
StorageContainerManager scm = createSCM(conf);
@@ -1342,18 +1332,29 @@ public static boolean scmInit(OzoneConfiguration conf,
13421332
} catch (AuthenticationException e) {
13431333
throw new IOException(e);
13441334
}
1345-
LOG.info("Enabled SCM HA");
13461335
}
13471336

13481337
LOG.info("SCM already initialized. Reusing existing cluster id for sd={}"
13491338
+ ";cid={}; layoutVersion={}; HAEnabled={}",
1350-
scmStorageConfig.getStorageDir(), clusterId,
1351-
scmStorageConfig.getLayoutVersion(),
1352-
scmStorageConfig.isSCMHAEnabled());
1339+
scmStorageConfig.getStorageDir(), scmStorageConfig.getClusterID(),
1340+
scmStorageConfig.getLayoutVersion(), scmStorageConfig.isSCMHAEnabled());
13531341
return true;
13541342
}
13551343
}
13561344

1345+
private static SCMStorageConfig initializeRatis(OzoneConfiguration conf)
1346+
throws IOException {
1347+
final SCMStorageConfig storageConfig = new SCMStorageConfig(conf);
1348+
final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf, storageConfig);
1349+
SCMRatisServerImpl.initialize(storageConfig.getClusterID(),
1350+
storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf);
1351+
storageConfig.setSCMHAFlag(true);
1352+
storageConfig.setPrimaryScmNodeId(storageConfig.getScmId());
1353+
storageConfig.forceInitialize();
1354+
LOG.info("Enabled Ratis!");
1355+
return storageConfig;
1356+
}
1357+
13571358
private static InetSocketAddress getScmAddress(SCMHANodeDetails haDetails,
13581359
ConfigurationSource conf) throws IOException {
13591360
List<SCMNodeInfo> scmNodeInfoList = SCMNodeInfo.buildNodeInfo(

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hdds.scm.upgrade;
1919

20+
import org.apache.hadoop.hdds.HddsConfigKeys;
2021
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
2122
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
2223
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -30,7 +31,10 @@
3031
import java.io.File;
3132
import java.io.IOException;
3233
import java.nio.file.Path;
34+
import java.util.Properties;
3335

36+
import static org.apache.hadoop.ozone.OzoneConsts.SCM_HA;
37+
import static org.apache.hadoop.ozone.OzoneConsts.SCM_ID;
3438
import static org.junit.jupiter.api.Assertions.assertEquals;
3539
import static org.junit.jupiter.api.Assertions.assertThrows;
3640
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -53,6 +57,8 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir)
5357
OzoneConfiguration conf = new OzoneConfiguration();
5458
conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS,
5559
tempDir.toAbsolutePath().toString());
60+
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
61+
tempDir.toAbsolutePath().toString());
5662

5763
// Set metadata layout version larger then software layout version.
5864
int largestSlv = 0;
@@ -61,9 +67,15 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir)
6167
}
6268
int mlv = largestSlv + 1;
6369

70+
Properties properties = new Properties();
71+
properties.setProperty(SCM_ID, "scm");
72+
properties.setProperty(SCM_HA, "true");
73+
6474
// Create version file with MLV > SLV, which should fail the SCM
6575
// construction.
66-
UpgradeTestUtils.createVersionFile(scmSubdir, HddsProtos.NodeType.SCM, mlv);
76+
UpgradeTestUtils.createVersionFile(scmSubdir, HddsProtos.NodeType.SCM, mlv,
77+
properties);
78+
6779

6880
Throwable t = assertThrows(IOException.class,
6981
() -> new StorageContainerManager(conf));

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ protected OMHAService createOMService() throws IOException,
538538
}
539539

540540
/**
541-
* Start OM service with multiple OMs.
541+
* Start SCM service with multiple SCMs.
542542
*/
543543
protected SCMHAService createSCMService()
544544
throws IOException, AuthenticationException {
@@ -616,10 +616,10 @@ protected SCMHAService createSCMService()
616616
* Initialize HA related configurations.
617617
*/
618618
private void initSCMHAConfig() {
619-
// Set configurations required for starting OM HA service, because that
619+
// Set configurations required for starting SCM HA service, because that
620620
// is the serviceID being passed to start Ozone HA cluster.
621-
// Here setting internal service and OZONE_OM_SERVICE_IDS_KEY, in this
622-
// way in OM start it uses internal service id to find it's service id.
621+
// Here setting internal service and OZONE_SCM_SERVICE_IDS_KEY, in this
622+
// way in SCM start it uses internal service id to find it's service id.
623623
conf.set(ScmConfigKeys.OZONE_SCM_SERVICE_IDS_KEY, scmServiceId);
624624
conf.set(ScmConfigKeys.OZONE_SCM_DEFAULT_SERVICE_ID, scmServiceId);
625625
String scmNodesKey = ConfUtils.addKeySuffixes(
@@ -629,6 +629,10 @@ private void initSCMHAConfig() {
629629

630630
for (int i = 1; i <= numOfSCMs; i++) {
631631
String scmNodeId = SCM_NODE_ID_PREFIX + i;
632+
633+
if (i == 1) {
634+
conf.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, scmNodeId);
635+
}
632636
scmNodesKeyValue.append(",").append(scmNodeId);
633637

634638
String scmAddrKey = ConfUtils.addKeySuffixes(

0 commit comments

Comments
 (0)