diff --git a/dev-support/ci/find_test_class_project.bats b/dev-support/ci/find_test_class_project.bats new file mode 100644 index 00000000000..40be1fbfbb9 --- /dev/null +++ b/dev-support/ci/find_test_class_project.bats @@ -0,0 +1,239 @@ +#!/usr/bin/env bats +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load find_test_class_project.sh + +setup() { + # Create a temporary directory for test files + TEST_DIR=$(mktemp -d) + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test" + mkdir -p "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test" + touch "${TEST_DIR}/project1/pom.xml" + touch "${TEST_DIR}/project2/pom.xml" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/TestClass1.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/TestClass2.java" + + ORIG_DIR=$(pwd) + cd "${TEST_DIR}" +} + +teardown() { + cd "${ORIG_DIR}" + rm -rf "${TEST_DIR}" +} + +# Test the find_project_paths_for_test_class function + +@test "find project for simple class name" { + result=$(find_project_paths_for_test_class "TestClass1" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "find project for class with package" { + result=$(find_project_paths_for_test_class "org.apache.ozone.test.TestClass2" 2>/dev/null) + + [ "$result" = "./project2" ] +} + +@test "find project for wildcard class" { + result=$(find_project_paths_for_test_class "TestClass*" 2>/dev/null) + expected=$(echo -e "./project1\n./project2") + + [ "$result" = "$expected" ] +} + +@test "no project for non-existent class" { + result=$(find_project_paths_for_test_class "NonExistentClass" 2>/dev/null) + + [ -z "$result" ] +} + +@test "skip abstract classes" { + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/AbstractTestClass.java" + + result=$(find_project_paths_for_test_class "AbstractTestClass" 2>/dev/null) + + [ -z "$result" ] +} + +@test "empty class name returns nothing" { + result=$(find_project_paths_for_test_class "" 2>/dev/null) + + [ -z "$result" ] +} + +@test "multiple projects with same test class name" { + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/CommonTest.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/CommonTest.java" + + result=$(find_project_paths_for_test_class "CommonTest" 2>/dev/null) + + expected=$(echo -e "./project1\n./project2") + [ "$result" = "$expected" ] +} + +@test "project without pom.xml is ignored" { + mkdir -p "${TEST_DIR}/project3/src/test/java/org/apache/ozone/test" + touch "${TEST_DIR}/project3/src/test/java/org/apache/ozone/test/TestClass3.java" + + result=$(find_project_paths_for_test_class "TestClass3" 2>/dev/null) + + [ -z "$result" ] +} + +@test "partial package name search" { + result=$(find_project_paths_for_test_class "ozone.test.TestClass2" 2>/dev/null) + + [ "$result" = "./project2" ] +} + +@test "test class in non-standard test directory" { + mkdir -p "${TEST_DIR}/project1/src/test/scala/org/apache/ozone/test" + touch "${TEST_DIR}/project1/src/test/scala/org/apache/ozone/test/ScalaTest.java" + + result=$(find_project_paths_for_test_class "ScalaTest" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "case sensitivity in class name" { + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/MixedCaseTest.java" + + result=$(find_project_paths_for_test_class "mixedcasetest" 2>/dev/null) + + [ -z "$result" ] +} + +@test "nested project structure" { + mkdir -p "${TEST_DIR}/parent/child/src/test/java/org/apache/ozone/test" + touch "${TEST_DIR}/parent/child/pom.xml" + touch "${TEST_DIR}/parent/child/src/test/java/org/apache/ozone/test/NestedTest.java" + + result=$(find_project_paths_for_test_class "NestedTest" 2>/dev/null) + + [ "$result" = "./parent/child" ] +} + +@test "test class with numeric suffix" { + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/Test1.java" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/Test2.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/Test3.java" + + result=$(find_project_paths_for_test_class "Test[1-2]" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "multiple test classes matching pattern" { + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/TestA.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/TestB.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/TestC.java" + + result=$(find_project_paths_for_test_class "Test[A-C]" 2>/dev/null) + + expected=$(echo -e "./project1\n./project2") + [ "$result" = "$expected" ] +} + +@test "test class in multiple package levels" { + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/deep/nested/pkg" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/deep/nested/pkg/DeepTest.java" + + result=$(find_project_paths_for_test_class "org.apache.ozone.test.deep.nested.pkg.DeepTest" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "test class with same name in different packages" { + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/pkg1" + mkdir -p "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/pkg2" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/pkg1/SameNameTest.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/pkg2/SameNameTest.java" + + result=$(find_project_paths_for_test_class "SameNameTest" 2>/dev/null) + + expected=$(echo -e "./project1\n./project2") + [ "$result" = "$expected" ] +} + +@test "test class with package wildcard" { + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/pkg1" + mkdir -p "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/pkg2" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/pkg1/WildcardTest.java" + touch "${TEST_DIR}/project2/src/test/java/org/apache/ozone/test/pkg2/WildcardTest.java" + + result=$(find_project_paths_for_test_class "org.apache.ozone.test.pkg*.WildcardTest" 2>/dev/null) + + expected=$(echo -e "./project1\n./project2") + [ "$result" = "$expected" ] +} + +@test "test class with exact package match" { + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/exact" + mkdir -p "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/exactmatch" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/exact/ExactTest.java" + touch "${TEST_DIR}/project1/src/test/java/org/apache/ozone/test/exactmatch/ExactTest.java" + result=$(find_project_paths_for_test_class "org.apache.ozone.test.exact.ExactTest" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "test class with trailing whitespace" { + result=$(find_project_paths_for_test_class "TestClass1 " 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "test class project with trailing whitespace" { + result=$(find_project_paths_for_test_class "apache.ozone.test.TestClass1 " 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "test class with leading whitespace" { + result=$(find_project_paths_for_test_class " TestClass1" 2>/dev/null) + + [ "$result" = "./project1" ] +} + +@test "test class with partial package and wildcard" { + result=$(find_project_paths_for_test_class "apache.*.TestClass*" 2>/dev/null) + + expected=$(echo -e "./project1\n./project2") + [ "$result" = "$expected" ] +} + +# Test the build_maven_project_list function + +@test "build maven project list with empty project paths" { + result=$(build_maven_project_list "") + + [ "$result" = "" ] +} + +@test "build maven project list with one project path" { + result=$(build_maven_project_list "./project1") + + [ "$result" = "-pl ./project1" ] +} + +@test "build maven project list with multiple project paths" { + local project_paths=$(echo -e "./project1\n./project2") + result=$(build_maven_project_list "$project_paths") + + [ "$result" = "-pl ./project1,./project2" ] +} diff --git a/dev-support/ci/find_test_class_project.sh b/dev-support/ci/find_test_class_project.sh new file mode 100755 index 00000000000..6f6e755984d --- /dev/null +++ b/dev-support/ci/find_test_class_project.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Function to find project paths for a given test class +find_project_paths_for_test_class() { + local test_class="$1" + + if [[ -z "${test_class}" ]] || [[ "${test_class}" == "Abstract"* ]]; then + return + fi + + echo "Finding project for test class: ${test_class}" >&2 + + # Trim test_class of whitespace + local base_class_name=$(echo "${test_class}" | xargs) + + # If the base name is empty after removing wildcards, use a reasonable default + if [[ -z "${base_class_name}" ]]; then + echo "Test class name contains only wildcards, searching in all test directories" >&2 + return + fi + + echo "Searching for files matching base name: ${base_class_name}" >&2 + + # Find all projects containing matching test classes - use a more flexible search approach + # First try direct filename search + local test_files=($(find . -path "*/src/test/*" -name "${base_class_name}.java" | sort -u)) + + # If no files found and the class name contains dots (package notation), try searching by path + if [[ ${#test_files[@]} -eq 0 && "${base_class_name}" == *"."* ]]; then + # Convert base class to path format + local test_class_path="${base_class_name//./\/}.java" + echo "No files found with direct name search, trying path-based search" >&2 + echo "TEST_CLASS_PATH pattern: ${test_class_path}" >&2 + + # Search by path pattern + test_files=($(find . -path "*/src/test/*/${test_class_path%.*}*.java" | sort -u)) + fi + + echo "Found ${#test_files[@]} matching test file(s)" >&2 + + if [[ ${#test_files[@]} -gt 0 ]]; then + # Extract project paths (up to the src/test directory) + local project_paths=() + for test_file in "${test_files[@]}"; do + echo "TEST_FILE: ${test_file}" >&2 + local project_path=$(dirname "${test_file}" | sed -e 's|/src/test.*||') + if [[ -f "${project_path}/pom.xml" ]]; then + echo "Found test in project: ${project_path}" >&2 + project_paths+=("${project_path}") + fi + done + + printf '%s\n' "${project_paths[@]}" | sort -u + else + echo "Could not find project for test class pattern: ${test_class}" >&2 + fi +} + +# Takes a project list which is the output of `find_project_paths_for_test_class` +# and returns a string that can use for maven -pl option, eg. "./project1\n./project2" -> "-pl ./project1,./project2" +build_maven_project_list() { + local project_paths="$1" + if [[ -z "${project_paths}" ]]; then + echo "" + return + fi + + local comma_separated=$(echo "${project_paths}" | tr '\n' ',') + comma_separated="${comma_separated%,}" + echo "-pl ${comma_separated}" +} + +# If option get-pl set, write the maven -pl option value to stdout +# otherwise, write the project paths to stdout +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + if [[ "$1" == "--get-pl" ]]; then + shift + project_paths=$(find_project_paths_for_test_class "$@") + build_maven_project_list "${project_paths}" + else + find_project_paths_for_test_class "$@" + fi +fi diff --git a/dev-support/ci/selective_ci_checks.bats b/dev-support/ci/selective_ci_checks.bats index 99085c942e0..e1d5ee7578b 100644 --- a/dev-support/ci/selective_ci_checks.bats +++ b/dev-support/ci/selective_ci_checks.bats @@ -99,6 +99,17 @@ load bats-assert/load.bash assert_output -p needs-kubernetes-tests=true } +@test "java test + pmd change" { + run dev-support/ci/selective_ci_checks.sh 250bd5f317 + + assert_output -p 'basic-checks=["rat","author","checkstyle","findbugs","pmd"]' + assert_output -p needs-build=true + assert_output -p needs-compile=true + assert_output -p needs-compose-tests=false + assert_output -p needs-integration-tests=true + assert_output -p needs-kubernetes-tests=false +} + @test "integration and unit: java change" { run dev-support/ci/selective_ci_checks.sh 9aebf6e25 diff --git a/dev-support/ci/selective_ci_checks.sh b/dev-support/ci/selective_ci_checks.sh index 213d071b911..24577653d6b 100755 --- a/dev-support/ci/selective_ci_checks.sh +++ b/dev-support/ci/selective_ci_checks.sh @@ -199,6 +199,7 @@ function run_all_tests_if_environment_files_changed() { ) local ignore_array=( "^dev-support/ci/pr_title_check" + "^dev-support/ci/find_test_class_project" ) filter_changed_files @@ -478,6 +479,7 @@ function get_count_misc_files() { start_end::group_start "Count misc. files" local pattern_array=( "^dev-support/ci/pr_title_check" + "^dev-support/ci/find_test_class_project" "^.github" "^hadoop-hdds/dev-support/checkstyle" "^hadoop-ozone/dev-support/checks" @@ -487,6 +489,7 @@ function get_count_misc_files() { "\.txt$" "\.md$" "findbugsExcludeFile.xml" + "pmd-ruleset.xml" "/NOTICE$" "^hadoop-ozone/dist/src/main/compose/common/grafana/dashboards" ) diff --git a/dev-support/pmd/pmd-ruleset.xml b/dev-support/pmd/pmd-ruleset.xml index 04b63b1b12f..5e7f2db92aa 100644 --- a/dev-support/pmd/pmd-ruleset.xml +++ b/dev-support/pmd/pmd-ruleset.xml @@ -33,7 +33,12 @@ + + + + + .*/generated-sources/.* diff --git a/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml b/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml index 61f4cddad8c..900658f024a 100644 --- a/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml @@ -18,10 +18,6 @@ - - - - diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java index af8413cc43a..152a59d41e2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.ratis.util.ExitUtils; import picocli.CommandLine; import picocli.CommandLine.ExitCode; import picocli.CommandLine.Option; @@ -78,7 +79,7 @@ public void run(String[] argv) { int exitCode = execute(argv); if (exitCode != ExitCode.OK) { - System.exit(exitCode); + ExitUtils.terminate(exitCode, null, null); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 941df45c2df..c2f79a78657 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -632,7 +632,9 @@ public final class ScmConfigKeys { public static final String NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY = "net.topology.node.switch.mapping.impl"; - + public static final String HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL + = "hdds.container.ratis.statemachine.write.wait.interval"; + public static final long HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT = 10 * 60 * 1000_000_000L; /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java index c7985cf47df..23c8f3bb8a2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java @@ -18,13 +18,15 @@ package org.apache.hadoop.hdds.scm.container; import com.google.common.base.Preconditions; -import org.apache.commons.lang3.builder.CompareToBuilder; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; +import jakarta.annotation.Nonnull; +import java.util.Objects; +import java.util.function.Supplier; +import net.jcip.annotations.Immutable; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.DelegatedCodec; import org.apache.hadoop.hdds.utils.db.LongCodec; +import org.apache.ratis.util.MemoizedSupplier; /** * Container ID is an integer that is a value between 1..MAX_CONTAINER ID. @@ -34,6 +36,7 @@ *

* This class is immutable. */ +@Immutable public final class ContainerID implements Comparable { private static final Codec CODEC = new DelegatedCodec<>( LongCodec.get(), ContainerID::valueOf, c -> c.id, @@ -46,16 +49,20 @@ public static Codec getCodec() { } private final long id; + private final Supplier proto; + private final Supplier hash; /** * Constructs ContainerID. * * @param id int */ - public ContainerID(long id) { + private ContainerID(long id) { Preconditions.checkState(id >= 0, "Container ID should be positive. %s.", id); this.id = id; + this.proto = MemoizedSupplier.valueOf(() -> HddsProtos.ContainerID.newBuilder().setId(id).build()); + this.hash = MemoizedSupplier.valueOf(() -> 61 * 71 + Long.hashCode(id)); } /** @@ -80,16 +87,12 @@ public long getId() { return id; } - /** - * Use proto message. - */ - @Deprecated - public byte[] getBytes() { + public static byte[] getBytes(long id) { return LongCodec.get().toPersistedFormat(id); } public HddsProtos.ContainerID getProtobuf() { - return HddsProtos.ContainerID.newBuilder().setId(id).build(); + return proto.get(); } public static ContainerID getFromProtobuf(HddsProtos.ContainerID proto) { @@ -107,25 +110,18 @@ public boolean equals(final Object o) { } final ContainerID that = (ContainerID) o; - - return new EqualsBuilder() - .append(id, that.id) - .isEquals(); + return this.id == that.id; } @Override public int hashCode() { - return new HashCodeBuilder(61, 71) - .append(id) - .toHashCode(); + return hash.get(); } @Override - public int compareTo(final ContainerID that) { - Preconditions.checkNotNull(that); - return new CompareToBuilder() - .append(this.id, that.id) - .build(); + public int compareTo(@Nonnull final ContainerID that) { + Objects.requireNonNull(that, "that == null"); + return Long.compare(this.id, that.id); } @Override diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NodeSchemaManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NodeSchemaManager.java index 51c1670d195..326b16a7b6f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NodeSchemaManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NodeSchemaManager.java @@ -124,7 +124,7 @@ public String complete(String path) { int i, j; for (i = 1, j = 1; i < subPath.length && j < (allSchema.size() - 1);) { if (allSchema.get(j).matchPrefix(subPath[i])) { - newPath.append(NetConstants.PATH_SEPARATOR_STR + subPath[i]); + newPath.append(NetConstants.PATH_SEPARATOR_STR).append(subPath[i]); i++; j++; } else { @@ -133,7 +133,7 @@ public String complete(String path) { } } if (i == (subPath.length - 1)) { - newPath.append(NetConstants.PATH_SEPARATOR_STR + subPath[i]); + newPath.append(NetConstants.PATH_SEPARATOR_STR).append(subPath[i]); return newPath.toString(); } return null; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index f10a7296fb8..997b5c576b7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -522,19 +522,20 @@ public int hashCode() { @Override public String toString() { final StringBuilder b = - new StringBuilder(getClass().getSimpleName()).append("["); + new StringBuilder(getClass().getSimpleName()).append("{"); b.append(" Id: ").append(id.getId()); - b.append(", Nodes: "); + b.append(", Nodes: ["); for (DatanodeDetails datanodeDetails : nodeStatus.keySet()) { - b.append(datanodeDetails); - b.append(" ReplicaIndex: ").append(this.getReplicaIndex(datanodeDetails)); + b.append(" {").append(datanodeDetails); + b.append(", ReplicaIndex: ").append(this.getReplicaIndex(datanodeDetails)).append("},"); } + b.append("]"); b.append(", ReplicationConfig: ").append(replicationConfig); b.append(", State:").append(getPipelineState()); b.append(", leaderId:").append(leaderId != null ? leaderId.toString() : ""); b.append(", CreationTimestamp").append(getCreationTimestamp() .atZone(ZoneId.systemDefault())); - b.append("]"); + b.append("}"); return b.toString(); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalization.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalization.java index 131469e4341..0afcc2c7b23 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalization.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalization.java @@ -17,6 +17,9 @@ package org.apache.hadoop.ozone.upgrade; +import static org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.INVALID_REQUEST; + +import java.io.IOException; import java.util.Collection; import java.util.Collections; import org.apache.hadoop.hdds.annotation.InterfaceAudience; @@ -110,6 +113,60 @@ public Collection msgs() { } } + public static void handleInvalidRequestAfterInitiatingFinalization( + boolean force, UpgradeException e) throws IOException { + if (INVALID_REQUEST.equals(e.getResult())) { + if (force) { + return; + } + System.err.println("Finalization is already in progress, it is not" + + "possible to initiate it again."); + e.printStackTrace(System.err); + System.err.println("If you want to track progress from a new client" + + "for any reason, use --takeover, and the status update will be" + + "received by the new client. Note that with forcing to monitor" + + "progress from a new client, the old one initiated the upgrade" + + "will not be able to monitor the progress further and exit."); + throw new IOException("Exiting..."); + } else { + throw e; + } + } + + public static void emitExitMsg() { + System.out.println("Exiting..."); + } + + public static boolean isFinalized(Status status) { + return Status.ALREADY_FINALIZED.equals(status); + } + + public static boolean isDone(Status status) { + return Status.FINALIZATION_DONE.equals(status); + } + + public static boolean isInprogress(Status status) { + return Status.FINALIZATION_IN_PROGRESS.equals(status); + } + + public static boolean isStarting(Status status) { + return Status.STARTING_FINALIZATION.equals(status); + } + + public static void emitGeneralErrorMsg() { + System.err.println("Finalization was not successful."); + } + + public static void emitFinishedMsg(String component) { + System.out.println("Finalization of " + component + "'s metadata upgrade " + + "finished."); + } + + public static void emitCancellationMsg(String component) { + System.err.println("Finalization command was cancelled. Note that, this" + + "will not cancel finalization in " + component + ". Progress can be" + + "monitored in the Ozone Manager's log."); + } private UpgradeFinalization() { // no instances } diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index f20d606d436..6bf3ca9255e 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -650,18 +650,6 @@ allow group public LIST access. - - ozone.om.user.max.volume - 1024 - OM, MANAGEMENT - - The maximum number of volumes a user can have on a cluster.Increasing or - decreasing this number has no real impact on ozone cluster. This is - defined only for operational purposes. Only an administrator can create a - volume, once a volume is created there are no restrictions on the number - of buckets or keys inside each bucket a user can create. - - ozone.om.db.dirs @@ -3548,6 +3536,14 @@ Timeout for the request submitted directly to Ratis in datanode. + + hdds.container.ratis.statemachine.write.wait.interval + OZONE, DATANODE + 10m + + Timeout for the write path for container blocks. + + hdds.datanode.slow.op.warning.threshold OZONE, DATANODE, PERFORMANCE diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerInfo.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerInfo.java index 3f2f7f2c09a..f38eceb52ad 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerInfo.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerInfo.java @@ -28,6 +28,8 @@ import java.time.Duration; import java.time.Instant; +import java.util.concurrent.ThreadLocalRandom; +import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -40,6 +42,25 @@ */ public class TestContainerInfo { + static int oldHash(long id) { + return new HashCodeBuilder(61, 71) + .append(id) + .toHashCode(); + } + + static void assertHash(long value) { + final ContainerID id = ContainerID.valueOf(value); + assertEquals(oldHash(value), id.hashCode(), id::toString); + } + + @Test + void testContainIdHash() { + for (int i = 0; i < 100; i++) { + assertHash(i); + final long id = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); + assertHash(id); + } + } @Test void getProtobufRatis() { diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java index a4ce5b24943..ee0f920d8af 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java @@ -80,13 +80,13 @@ void testJsonOutput() throws IOException { report.incrementAndSample( ReplicationManagerReport.HealthState.UNDER_REPLICATED, - new ContainerID(1)); + ContainerID.valueOf(1)); report.incrementAndSample( ReplicationManagerReport.HealthState.UNDER_REPLICATED, - new ContainerID(2)); + ContainerID.valueOf(2)); report.incrementAndSample( ReplicationManagerReport.HealthState.OVER_REPLICATED, - new ContainerID(3)); + ContainerID.valueOf(3)); report.setComplete(); String jsonString = JsonUtils.toJsonStringWithDefaultPrettyPrinter(report); @@ -124,13 +124,13 @@ void testJsonOutput() throws IOException { void testContainerIDsCanBeSampled() { report.incrementAndSample( ReplicationManagerReport.HealthState.UNDER_REPLICATED, - new ContainerID(1)); + ContainerID.valueOf(1)); report.incrementAndSample( ReplicationManagerReport.HealthState.UNDER_REPLICATED, - new ContainerID(2)); + ContainerID.valueOf(2)); report.incrementAndSample( ReplicationManagerReport.HealthState.OVER_REPLICATED, - new ContainerID(3)); + ContainerID.valueOf(3)); assertEquals(2, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); @@ -141,13 +141,13 @@ void testContainerIDsCanBeSampled() { List sample = report.getSample(ReplicationManagerReport.HealthState.UNDER_REPLICATED); - assertEquals(new ContainerID(1), sample.get(0)); - assertEquals(new ContainerID(2), sample.get(1)); + assertEquals(ContainerID.valueOf(1), sample.get(0)); + assertEquals(ContainerID.valueOf(2), sample.get(1)); assertEquals(2, sample.size()); sample = report.getSample(ReplicationManagerReport.HealthState.OVER_REPLICATED); - assertEquals(new ContainerID(3), sample.get(0)); + assertEquals(ContainerID.valueOf(3), sample.get(0)); assertEquals(1, sample.size()); sample = @@ -160,13 +160,13 @@ void testSamplesAreLimited() { for (int i = 0; i < ReplicationManagerReport.SAMPLE_LIMIT * 2; i++) { report.incrementAndSample( ReplicationManagerReport.HealthState.UNDER_REPLICATED, - new ContainerID(i)); + ContainerID.valueOf(i)); } List sample = report.getSample(ReplicationManagerReport.HealthState.UNDER_REPLICATED); assertEquals(ReplicationManagerReport.SAMPLE_LIMIT, sample.size()); for (int i = 0; i < ReplicationManagerReport.SAMPLE_LIMIT; i++) { - assertEquals(new ContainerID(i), sample.get(i)); + assertEquals(ContainerID.valueOf(i), sample.get(i)); } } diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java index 74347acefa4..a213482a5c8 100644 --- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java +++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java @@ -98,7 +98,7 @@ default String getTrimmed(String key, String defaultValue) { default String[] getTrimmedStrings(String name) { String valueString = get(name); - if (null == valueString || valueString.trim().isEmpty()) { + if (null == valueString) { return EMPTY_STRING_ARRAY; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java index 7ff80c7a732..03f0fec1835 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java @@ -205,34 +205,23 @@ public long getProcessedTransactionFailCount() { @Override public String toString() { - StringBuffer buffer = new StringBuffer(); - buffer.append("successCount = " + successCount.value()).append("\t") - .append("successBytes = " + successBytes.value()).append("\t") - .append("failureCount = " + failureCount.value()).append("\t") - .append("outOfOrderDeleteBlockTransactionCount = " - + outOfOrderDeleteBlockTransactionCount.value()).append("\t") - .append("totalPendingBlockCount = " - + totalPendingBlockCount.value()).append("\t") - .append("totalBlockChosenCount = " - + totalBlockChosenCount.value()).append("\t") - .append("totalContainerChosenCount = " - + totalContainerChosenCount.value()).append("\t") - .append("receivedTransactionCount = " - + receivedTransactionCount.value()).append("\t") - .append("receivedRetryTransactionCount = " - + receivedRetryTransactionCount.value()).append("\t") - .append("processedTransactionSuccessCount = " - + processedTransactionSuccessCount.value()).append("\t") - .append("processedTransactionFailCount = " - + processedTransactionFailCount.value()).append("\t") - .append("receivedContainerCount = " - + receivedContainerCount.value()).append("\t") - .append("receivedBlockCount = " - + receivedBlockCount.value()).append("\t") - .append("markedBlockCount = " - + markedBlockCount.value()).append("\t") - .append("totalLockTimeoutTransactionCount = " - + totalLockTimeoutTransactionCount.value()).append("\t"); + StringBuilder buffer = new StringBuilder() + .append("successCount = ").append(successCount.value()).append("\t") + .append("successBytes = ").append(successBytes.value()).append("\t") + .append("failureCount = ").append(failureCount.value()).append("\t") + .append("outOfOrderDeleteBlockTransactionCount = ") + .append(outOfOrderDeleteBlockTransactionCount.value()).append("\t") + .append("totalPendingBlockCount = ").append(totalPendingBlockCount.value()).append("\t") + .append("totalBlockChosenCount = ").append(totalBlockChosenCount.value()).append("\t") + .append("totalContainerChosenCount = ").append(totalContainerChosenCount.value()).append("\t") + .append("receivedTransactionCount = ").append(receivedTransactionCount.value()).append("\t") + .append("receivedRetryTransactionCount = ").append(receivedRetryTransactionCount.value()).append("\t") + .append("processedTransactionSuccessCount = ").append(processedTransactionSuccessCount.value()).append("\t") + .append("processedTransactionFailCount = ").append(processedTransactionFailCount.value()).append("\t") + .append("receivedContainerCount = ").append(receivedContainerCount.value()).append("\t") + .append("receivedBlockCount = ").append(receivedBlockCount.value()).append("\t") + .append("markedBlockCount = ").append(markedBlockCount.value()).append("\t") + .append("totalLockTimeoutTransactionCount = ").append(totalLockTimeoutTransactionCount.value()).append("\t"); return buffer.toString(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index bc995854a8f..91cfaa5a21a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -102,7 +102,7 @@ public class StateContext { static final Logger LOG = LoggerFactory.getLogger(StateContext.class); - private final Queue commandQueue; + private final Queue> commandQueue; private final Map cmdStatusMap; private final Lock lock; private final DatanodeStateMachine parentDatanodeStateMachine; @@ -738,7 +738,7 @@ public OptionalLong getTermOfLeaderSCM() { * * @return SCMCommand or Null. */ - public SCMCommand getNextCommand() { + public SCMCommand getNextCommand() { lock.lock(); try { initTermOfLeaderSCM(); @@ -772,7 +772,7 @@ public SCMCommand getNextCommand() { * * @param command - SCMCommand. */ - public void addCommand(SCMCommand command) { + public void addCommand(SCMCommand command) { lock.lock(); try { if (commandQueue.size() >= maxCommandQueueLimit) { @@ -792,7 +792,7 @@ public Map getCommandQueueSummary() { Map summary = new HashMap<>(); lock.lock(); try { - for (SCMCommand cmd : commandQueue) { + for (SCMCommand cmd : commandQueue) { summary.put(cmd.getType(), summary.getOrDefault(cmd.getType(), 0) + 1); } } finally { @@ -832,7 +832,7 @@ public void addCmdStatus(Long key, CommandStatus status) { * * @param cmd - {@link SCMCommand}. */ - public void addCmdStatus(SCMCommand cmd) { + public void addCmdStatus(SCMCommand cmd) { if (cmd.getType() == SCMCommandProto.Type.deleteBlocksCommand) { addCmdStatus(cmd.getId(), DeleteBlockCommandStatusBuilder.newBuilder() diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java index 700303ee0c9..1a1594cf8a9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java @@ -84,7 +84,7 @@ public CloseContainerCommandHandler( * @param connectionManager - The SCMs that we are talking to. */ @Override - public void handle(SCMCommand command, OzoneContainer ozoneContainer, + public void handle(SCMCommand command, OzoneContainer ozoneContainer, StateContext context, SCMConnectionManager connectionManager) { queuedCount.incrementAndGet(); CompletableFuture.runAsync(() -> { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java index 8fcd192fe53..5cbe4726897 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java @@ -93,7 +93,7 @@ public ClosePipelineCommandHandler( * @param connectionManager - The SCMs that we are talking to. */ @Override - public void handle(SCMCommand command, OzoneContainer ozoneContainer, + public void handle(SCMCommand command, OzoneContainer ozoneContainer, StateContext context, SCMConnectionManager connectionManager) { queuedCount.incrementAndGet(); CompletableFuture.runAsync(() -> { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java index 69a40e1f1ad..696b04defe3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java @@ -85,7 +85,7 @@ public CommandHandler getDeleteBlocksCommandHandler() { * * @param command - SCM Command. */ - public void handle(SCMCommand command) { + public void handle(SCMCommand command) { Preconditions.checkNotNull(command); CommandHandler handler = handlerMap.get(command.getType()); if (handler != null) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java index 68ab8087d6b..d516977838e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java @@ -38,7 +38,7 @@ public interface CommandHandler { * @param context - Current Context. * @param connectionManager - The SCMs that we are talking to. */ - void handle(SCMCommand command, OzoneContainer container, + void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager); /** @@ -68,7 +68,7 @@ void handle(SCMCommand command, OzoneContainer container, /** * Default implementation for updating command status. */ - default void updateCommandStatus(StateContext context, SCMCommand command, + default void updateCommandStatus(StateContext context, SCMCommand command, Consumer cmdStatusUpdater, Logger log) { if (!context.updateCommandStatus(command.getId(), cmdStatusUpdater)) { log.warn("{} with Id:{} not found.", command.getType(), diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java index d86c0287516..30ffe7ed415 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java @@ -90,7 +90,7 @@ public CreatePipelineCommandHandler(ConfigurationSource conf, * @param connectionManager - The SCMs that we are talking to. */ @Override - public void handle(SCMCommand command, OzoneContainer ozoneContainer, + public void handle(SCMCommand command, OzoneContainer ozoneContainer, StateContext context, SCMConnectionManager connectionManager) { queuedCount.incrementAndGet(); CompletableFuture.runAsync(() -> { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java index 80c078c5087..71277c06377 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java @@ -123,7 +123,7 @@ public DeleteBlocksCommandHandler(OzoneContainer container, } @Override - public void handle(SCMCommand command, OzoneContainer container, + public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { if (command.getType() != SCMCommandProto.Type.deleteBlocksCommand) { LOG.warn("Skipping handling command, expected command " diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java index 1d23da794a1..ae036a1c8f8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java @@ -76,7 +76,7 @@ protected DeleteContainerCommandHandler(Clock clock, this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.deleteContainerCommand + "Ms"); } @Override - public void handle(final SCMCommand command, + public void handle(final SCMCommand command, final OzoneContainer ozoneContainer, final StateContext context, final SCMConnectionManager connectionManager) { @@ -93,7 +93,7 @@ public void handle(final SCMCommand command, } } - private void handleInternal(SCMCommand command, StateContext context, + private void handleInternal(SCMCommand command, StateContext context, DeleteContainerCommand deleteContainerCommand, ContainerController controller) { final long startTime = Time.monotonicNow(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java index 6e1c566343d..a27b94b76a3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java @@ -63,7 +63,7 @@ public FinalizeNewLayoutVersionCommandHandler() { * @param connectionManager - The SCMs that we are talking to. */ @Override - public void handle(SCMCommand command, OzoneContainer ozoneContainer, + public void handle(SCMCommand command, OzoneContainer ozoneContainer, StateContext context, SCMConnectionManager connectionManager) { LOG.info("Processing FinalizeNewLayoutVersionCommandHandler command."); invocationCount.incrementAndGet(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java index 4366a912188..b2159aa44f7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java @@ -47,7 +47,7 @@ public ReconstructECContainersCommandHandler(ConfigurationSource conf, } @Override - public void handle(SCMCommand command, OzoneContainer container, + public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { ReconstructECContainersCommand ecContainersCommand = (ReconstructECContainersCommand) command; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java index bc8b69a50ae..f26329792b0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java @@ -48,7 +48,7 @@ public RefreshVolumeUsageCommandHandler() { } @Override - public void handle(SCMCommand command, OzoneContainer container, + public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { LOG.info("receive command to refresh usage info of all volumes"); invocationCount.incrementAndGet(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java index d52c51e298e..17bb10fc7ea 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java @@ -65,7 +65,7 @@ public String getMetricsName() { } @Override - public void handle(SCMCommand command, OzoneContainer container, + public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { final ReplicateContainerCommand replicateCommand = diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java index 548a5491743..25a158bb45d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java @@ -76,7 +76,7 @@ public SetNodeOperationalStateCommandHandler(ConfigurationSource conf, * @param connectionManager - The SCMs that we are talking to. */ @Override - public void handle(SCMCommand command, OzoneContainer container, + public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { long startTime = Time.monotonicNow(); invocationCount.incrementAndGet(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 0a1df1088d6..2a9fe61d17f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -35,17 +35,22 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.SortedMap; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import org.apache.hadoop.hdds.HddsUtils; @@ -187,13 +192,38 @@ long getStartTime() { } } + static class WriteFutures { + private final Future writeChunkFuture; + private final CompletableFuture raftFuture; + private final long startTime; + + WriteFutures(Future writeChunkFuture, + CompletableFuture raftFuture, long startTime) { + this.writeChunkFuture = writeChunkFuture; + this.raftFuture = raftFuture; + this.startTime = startTime; + } + + public Future getWriteChunkFuture() { + return writeChunkFuture; + } + + public CompletableFuture getRaftFuture() { + return raftFuture; + } + + long getStartTime() { + return startTime; + } + } + private final SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); private final ContainerDispatcher dispatcher; private final ContainerController containerController; private final XceiverServerRatis ratisServer; - private final ConcurrentHashMap> writeChunkFutureMap; + private final NavigableMap writeChunkFutureMap; + private final long writeChunkWaitMaxNs; // keeps track of the containers created per pipeline private final Map container2BCSIDMap; @@ -229,7 +259,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI this.containerController = containerController; this.ratisServer = ratisServer; metrics = CSMMetrics.create(gid); - this.writeChunkFutureMap = new ConcurrentHashMap<>(); + this.writeChunkFutureMap = new ConcurrentSkipListMap<>(); applyTransactionCompletionMap = new ConcurrentHashMap<>(); this.unhealthyContainers = ConcurrentHashMap.newKeySet(); long pendingRequestsBytesLimit = (long)conf.getStorageSize( @@ -273,6 +303,8 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI this.waitOnBothFollowers = conf.getObject( DatanodeConfiguration.class).waitOnAllFollowers(); + this.writeChunkWaitMaxNs = conf.getTimeDuration(ScmConfigKeys.HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL, + ScmConfigKeys.HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT, TimeUnit.NANOSECONDS); } private void validatePeers() throws IOException { @@ -542,6 +574,16 @@ private ContainerCommandResponseProto dispatchCommand( private CompletableFuture writeStateMachineData( ContainerCommandRequestProto requestProto, long entryIndex, long term, long startTime) { + final WriteFutures previous = writeChunkFutureMap.get(entryIndex); + if (previous != null) { + // generally state machine will wait forever, for precaution, a check is added if retry happens. + return previous.getRaftFuture(); + } + try { + validateLongRunningWrite(); + } catch (StorageContainerException e) { + return completeExceptionally(e); + } final WriteChunkRequestProto write = requestProto.getWriteChunk(); RaftServer server = ratisServer.getServer(); Preconditions.checkArgument(!write.getData().isEmpty()); @@ -564,19 +606,22 @@ private CompletableFuture writeStateMachineData( .setContainer2BCSIDMap(container2BCSIDMap) .build(); CompletableFuture raftFuture = new CompletableFuture<>(); - // ensure the write chunk happens asynchronously in writeChunkExecutor pool - // thread. - CompletableFuture writeChunkFuture = - CompletableFuture.supplyAsync(() -> { + // ensure the write chunk happens asynchronously in writeChunkExecutor pool thread. + Future future = getChunkExecutor( + requestProto.getWriteChunk()).submit(() -> { try { try { checkContainerHealthy(write.getBlockID().getContainerID(), true); } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, requestProto); + ContainerCommandResponseProto result = ContainerUtils.logAndReturnError(LOG, e, requestProto); + handleCommandResult(requestProto, entryIndex, startTime, result, write, raftFuture); + return result; } metrics.recordWriteStateMachineQueueingLatencyNs( Time.monotonicNowNanos() - startTime); - return dispatchCommand(requestProto, context); + ContainerCommandResponseProto result = dispatchCommand(requestProto, context); + handleCommandResult(requestProto, entryIndex, startTime, result, write, raftFuture); + return result; } catch (Exception e) { LOG.error("{}: writeChunk writeStateMachineData failed: blockId" + "{} logIndex {} chunkName {}", getGroupId(), write.getBlockID(), @@ -588,55 +633,87 @@ private CompletableFuture writeStateMachineData( stateMachineHealthy.set(false); raftFuture.completeExceptionally(e); throw e; + } finally { + // Remove the future once it finishes execution from the + writeChunkFutureMap.remove(entryIndex); } - }, getChunkExecutor(requestProto.getWriteChunk())); + }); - writeChunkFutureMap.put(entryIndex, writeChunkFuture); + writeChunkFutureMap.put(entryIndex, new WriteFutures(future, raftFuture, startTime)); if (LOG.isDebugEnabled()) { LOG.debug("{}: writeChunk writeStateMachineData : blockId" + "{} logIndex {} chunkName {}", getGroupId(), write.getBlockID(), entryIndex, write.getChunkData().getChunkName()); } - // Remove the future once it finishes execution from the - // writeChunkFutureMap. - writeChunkFuture.thenApply(r -> { - if (r.getResult() != ContainerProtos.Result.SUCCESS - && r.getResult() != ContainerProtos.Result.CONTAINER_NOT_OPEN - && r.getResult() != ContainerProtos.Result.CLOSED_CONTAINER_IO - // After concurrent flushes are allowed on the same key, chunk file inconsistencies can happen and - // that should not crash the pipeline. - && r.getResult() != ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY) { - StorageContainerException sce = - new StorageContainerException(r.getMessage(), r.getResult()); - LOG.error(getGroupId() + ": writeChunk writeStateMachineData failed: blockId" + + return raftFuture; + } + + private void handleCommandResult(ContainerCommandRequestProto requestProto, long entryIndex, long startTime, + ContainerCommandResponseProto r, WriteChunkRequestProto write, + CompletableFuture raftFuture) { + if (r.getResult() != ContainerProtos.Result.SUCCESS + && r.getResult() != ContainerProtos.Result.CONTAINER_NOT_OPEN + && r.getResult() != ContainerProtos.Result.CLOSED_CONTAINER_IO + // After concurrent flushes are allowed on the same key, chunk file inconsistencies can happen and + // that should not crash the pipeline. + && r.getResult() != ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY) { + StorageContainerException sce = + new StorageContainerException(r.getMessage(), r.getResult()); + LOG.error(getGroupId() + ": writeChunk writeStateMachineData failed: blockId" + + write.getBlockID() + " logIndex " + entryIndex + " chunkName " + + write.getChunkData().getChunkName() + " Error message: " + + r.getMessage() + " Container Result: " + r.getResult()); + metrics.incNumWriteDataFails(); + // If the write fails currently we mark the stateMachine as unhealthy. + // This leads to pipeline close. Any change in that behavior requires + // handling the entry for the write chunk in cache. + stateMachineHealthy.set(false); + unhealthyContainers.add(write.getBlockID().getContainerID()); + raftFuture.completeExceptionally(sce); + } else { + metrics.incNumBytesWrittenCount( + requestProto.getWriteChunk().getChunkData().getLen()); + if (LOG.isDebugEnabled()) { + LOG.debug(getGroupId() + + ": writeChunk writeStateMachineData completed: blockId" + write.getBlockID() + " logIndex " + entryIndex + " chunkName " + - write.getChunkData().getChunkName() + " Error message: " + - r.getMessage() + " Container Result: " + r.getResult()); - metrics.incNumWriteDataFails(); - // If the write fails currently we mark the stateMachine as unhealthy. - // This leads to pipeline close. Any change in that behavior requires - // handling the entry for the write chunk in cache. - stateMachineHealthy.set(false); - unhealthyContainers.add(write.getBlockID().getContainerID()); - raftFuture.completeExceptionally(sce); - } else { - metrics.incNumBytesWrittenCount( - requestProto.getWriteChunk().getChunkData().getLen()); - if (LOG.isDebugEnabled()) { - LOG.debug(getGroupId() + - ": writeChunk writeStateMachineData completed: blockId" + - write.getBlockID() + " logIndex " + entryIndex + " chunkName " + - write.getChunkData().getChunkName()); - } - raftFuture.complete(r::toByteString); - metrics.recordWriteStateMachineCompletionNs( - Time.monotonicNowNanos() - startTime); + write.getChunkData().getChunkName()); } + raftFuture.complete(r::toByteString); + metrics.recordWriteStateMachineCompletionNs( + Time.monotonicNowNanos() - startTime); + } + } - writeChunkFutureMap.remove(entryIndex); - return r; - }); - return raftFuture; + private void validateLongRunningWrite() throws StorageContainerException { + // get min valid write chunk operation's future context + Map.Entry writeFutureContextEntry = null; + for (boolean found = false; !found;) { + writeFutureContextEntry = writeChunkFutureMap.firstEntry(); + if (null == writeFutureContextEntry) { + return; + } + if (writeFutureContextEntry.getValue().getWriteChunkFuture().isDone()) { + // there is a possibility that writeChunkFutureMap may have dangling entry, as remove is done before add future + writeChunkFutureMap.remove(writeFutureContextEntry.getKey()); + } else { + found = true; + } + } + // validate for timeout in milli second + long waitTime = Time.monotonicNowNanos() - writeFutureContextEntry.getValue().getStartTime(); + if (waitTime > writeChunkWaitMaxNs) { + LOG.error("Write chunk has taken {}ns crossing threshold {}ns for index {} groupId {}, " + + "cancelling pending write chunk for this group", waitTime, writeChunkWaitMaxNs, + writeFutureContextEntry.getKey(), getGroupId()); + stateMachineHealthy.set(false); + writeChunkFutureMap.forEach((key, value) -> { + value.getWriteChunkFuture().cancel(true); + }); + throw new StorageContainerException("Write chunk has taken " + waitTime + "ns crossing threshold " + + writeChunkWaitMaxNs + "ns for index " + writeFutureContextEntry.getKey() + " groupId " + getGroupId(), + ContainerProtos.Result.CONTAINER_INTERNAL_ERROR); + } } private StateMachine.DataChannel getStreamDataChannel( @@ -819,9 +896,13 @@ private ByteString readStateMachineData( */ @Override public CompletableFuture flush(long index) { - return CompletableFuture.allOf( - writeChunkFutureMap.entrySet().stream().filter(x -> x.getKey() <= index) - .map(Map.Entry::getValue).toArray(CompletableFuture[]::new)); + final SortedMap head = writeChunkFutureMap.headMap(index, true); + if (head.isEmpty()) { + return CompletableFuture.completedFuture(null); + } + return CompletableFuture.allOf(head.values().stream() + .map(WriteFutures::getRaftFuture) + .toArray(CompletableFuture[]::new)); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java index 057d96204a8..4694850b936 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java @@ -493,7 +493,7 @@ private SortedMap getBlockDataMap(long containerID, SortedMap resultMap = new TreeMap<>(); Token containerToken = - tokenHelper.getContainerToken(new ContainerID(containerID)); + tokenHelper.getContainerToken(ContainerID.valueOf(containerID)); Iterator> iterator = sourceNodeMap.entrySet().iterator(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java index 5d3c001eaf7..415a5fa58c9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java @@ -305,7 +305,7 @@ private byte[] innerUnpack(InputStream input, Path dbRoot, Path chunksRoot) descriptorFileContent = readEntry(archiveInput, size); } else { throw new IllegalArgumentException( - "Unknown entry in the tar file: " + "" + name); + "Unknown entry in the tar file: " + name); } entry = archiveInput.getNextEntry(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java index 2d0ed82d902..ffe5a40fb49 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java @@ -69,7 +69,7 @@ RegisteredCommand register(DatanodeDetails datanodeDetails, * @param datanodeDetails - Datanode ID. * @return Commands to be sent to the datanode. */ - default List processHeartbeat(DatanodeDetails datanodeDetails) { + default List> processHeartbeat(DatanodeDetails datanodeDetails) { return processHeartbeat(datanodeDetails, null); }; @@ -80,7 +80,7 @@ default List processHeartbeat(DatanodeDetails datanodeDetails) { * heartbeating datanode. * @return Commands to be sent to the datanode. */ - List processHeartbeat(DatanodeDetails datanodeDetails, + List> processHeartbeat(DatanodeDetails datanodeDetails, CommandQueueReportProto queueReport); /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java index 02e3bd3547a..ca7427db822 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java @@ -95,7 +95,7 @@ public void testDatanodeIDPersistent(@TempDir File tempDir) throws Exception { assertWriteRead(tempDir, id1); // Add certificate serial id. - id1.setCertSerialId("" + RandomUtils.nextLong()); + id1.setCertSerialId(String.valueOf(RandomUtils.secure().randomLong())); assertWriteRead(tempDir, id1); // Read should return an empty value if file doesn't exist diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java index db64c2c16bc..22a335e1594 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java @@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; @@ -30,8 +31,10 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.IOException; +import java.lang.reflect.Field; import java.util.List; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -57,6 +60,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -246,4 +250,54 @@ public void testApplyTransactionFailure(boolean failWithException) throws Execut ContainerProtos.ContainerCommandResponseProto.parseFrom(succcesfulTransaction.getContent()); assertEquals(ContainerProtos.Result.SUCCESS, resp.getResult()); } + + @Test + public void testWriteTimout() throws Exception { + RaftProtos.LogEntryProto entry = mock(RaftProtos.LogEntryProto.class); + when(entry.getTerm()).thenReturn(1L); + when(entry.getIndex()).thenReturn(1L); + RaftProtos.LogEntryProto entryNext = mock(RaftProtos.LogEntryProto.class); + when(entryNext.getTerm()).thenReturn(1L); + when(entryNext.getIndex()).thenReturn(2L); + TransactionContext trx = mock(TransactionContext.class); + ContainerStateMachine.Context context = mock(ContainerStateMachine.Context.class); + when(trx.getStateMachineContext()).thenReturn(context); + doAnswer(e -> { + try { + Thread.sleep(200000); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw ie; + } + return null; + }).when(dispatcher).dispatch(any(), any()); + + when(context.getRequestProto()).thenReturn(ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.WriteChunk).setWriteChunk( + ContainerProtos.WriteChunkRequestProto.newBuilder().setData(ByteString.copyFromUtf8("Test Data")) + .setBlockID( + ContainerProtos.DatanodeBlockID.newBuilder().setContainerID(1).setLocalID(1).build()).build()) + .setContainerID(1) + .setDatanodeUuid(UUID.randomUUID().toString()).build()); + AtomicReference throwable = new AtomicReference<>(null); + Function throwableSetter = t -> { + throwable.set(t); + return null; + }; + Field writeChunkWaitMaxNs = stateMachine.getClass().getDeclaredField("writeChunkWaitMaxNs"); + writeChunkWaitMaxNs.setAccessible(true); + writeChunkWaitMaxNs.set(stateMachine, 1000_000_000); + CompletableFuture firstWrite = stateMachine.write(entry, trx); + Thread.sleep(2000); + CompletableFuture secondWrite = stateMachine.write(entryNext, trx); + firstWrite.exceptionally(throwableSetter).get(); + assertNotNull(throwable.get()); + assertInstanceOf(InterruptedException.class, throwable.get()); + + secondWrite.exceptionally(throwableSetter).get(); + assertNotNull(throwable.get()); + assertInstanceOf(StorageContainerException.class, throwable.get()); + StorageContainerException sce = (StorageContainerException) throwable.get(); + assertEquals(ContainerProtos.Result.CONTAINER_INTERNAL_ERROR, sce.getResult()); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index c17ef1bfb62..e98be4881a5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -29,6 +29,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.JsonTestUtils; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.utils.db.BatchOperation; @@ -511,7 +512,7 @@ private JsonNode runInspectorAndGetReport( String output = capturer.getOutput(); capturer.clearOutput(); // Check if the output is effectively empty - if (output.trim().isEmpty()) { + if (StringUtils.isBlank(output)) { return null; } return JsonTestUtils.readTree(output); diff --git a/hadoop-hdds/docs/content/tools/AuditParser.md b/hadoop-hdds/docs/content/tools/AuditParser.md index ee2acd959c5..ebc5c8ae6a8 100644 --- a/hadoop-hdds/docs/content/tools/AuditParser.md +++ b/hadoop-hdds/docs/content/tools/AuditParser.md @@ -40,25 +40,25 @@ UNIQUE(datetime,level,logger,user,ip,op,params,result)) Usage: {{< highlight bash >}} -ozone auditparser [COMMAND] [PARAM] +ozone debug auditparser [COMMAND] [PARAM] {{< /highlight >}} To load an audit log to database: {{< highlight bash >}} -ozone auditparser load +ozone debug auditparser load {{< /highlight >}} Load command creates the audit table described above. To run a custom read-only query: {{< highlight bash >}} -ozone auditparser query {{< /highlight >}} Audit Parser comes with a set of templates(most commonly used queries). To run a template query: {{< highlight bash >}} -ozone auditparser template +ozone debug auditparser template {{< /highlight >}} Following templates are available: diff --git a/hadoop-hdds/docs/content/tools/AuditParser.zh.md b/hadoop-hdds/docs/content/tools/AuditParser.zh.md index 10786d5174a..ffe0617328a 100644 --- a/hadoop-hdds/docs/content/tools/AuditParser.zh.md +++ b/hadoop-hdds/docs/content/tools/AuditParser.zh.md @@ -38,25 +38,25 @@ UNIQUE(datetime,level,logger,user,ip,op,params,result)) 用法: {{< highlight bash >}} -ozone auditparser <数据库文件的路径> [命令] [参数] +ozone debug auditparser <数据库文件的路径> [命令] [参数] {{< /highlight >}} 将审计日志加载到数据库: {{< highlight bash >}} -ozone auditparser <数据库文件的路径> load <审计日志的路径> +ozone debug auditparser <数据库文件的路径> load <审计日志的路径> {{< /highlight >}} Load 命令会创建如上所述的审计表。 运行一个自定义的只读查询: {{< highlight bash >}} -ozone auditparser <数据库文件的路径> query <双引号括起来的 select 查询> +ozone debug auditparser <数据库文件的路径> query <双引号括起来的 select 查询> {{< /highlight >}} 审计解析起自带了一些模板(最常用的查询) 运行模板查询: {{< highlight bash >}} -ozone auditparser <数据库文件的路径 template <模板名称> +ozone debug auditparser <数据库文件的路径 template <模板名称> {{< /highlight >}} Ozone 提供了以下模板: diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/ProfileServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/ProfileServlet.java index ad9c7315ba4..1d1d5e06e1a 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/ProfileServlet.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/ProfileServlet.java @@ -37,6 +37,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -162,7 +163,7 @@ private Integer getPid() { // in case if it is not set correctly used fallback from mxbean which is // implementation specific - if (pidStr == null || pidStr.trim().isEmpty()) { + if (StringUtils.isBlank(pidStr)) { String name = ManagementFactory.getRuntimeMXBean().getName(); if (name != null) { int idx = name.indexOf("@"); @@ -217,7 +218,7 @@ protected static String validateFileName(String filename) { protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) throws IOException { // make sure async profiler home is set - if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + if (StringUtils.isBlank(asyncProfilerHome)) { resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); setResponseHeader(resp); resp.getWriter().write("ASYNC_PROFILER_HOME env is not set."); @@ -272,7 +273,7 @@ protected void doGet(final HttpServletRequest req, cmd.add("-e"); cmd.add(event.getInternalName()); cmd.add("-d"); - cmd.add("" + duration); + cmd.add(String.valueOf(duration)); cmd.add("-o"); cmd.add(output.name().toLowerCase()); cmd.add("-f"); @@ -469,7 +470,7 @@ static String getAsyncProfilerHome() { String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV); // if ENV is not set, see if -Dasync.profiler // .home=/path/to/async/profiler/home is set - if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + if (StringUtils.isBlank(asyncProfilerHome)) { asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY); } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/CodecRegistry.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/CodecRegistry.java index a8a95400213..82fa687ccca 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/CodecRegistry.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/CodecRegistry.java @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.commons.lang3.ClassUtils; /** @@ -61,6 +62,7 @@ private CodecMap(Map, Codec> map) { } Codec get(Class clazz) { + Objects.requireNonNull(clazz, "clazz == null"); final Codec codec = map.get(clazz); return (Codec) codec; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java index deef96f317f..4d83acba39e 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java @@ -73,6 +73,19 @@ Table getTable(String name, Class keyType, Class valueType, TableCache.CacheType cacheType) throws IOException; + /** + * Gets table store with implict key/value conversion. + * + * @param name - table name + * @param keyCodec - key codec + * @param valueCodec - value codec + * @param cacheType - cache type + * @return - Table Store + * @throws IOException + */ + TypedTable getTable( + String name, Codec keyCodec, Codec valueCodec, TableCache.CacheType cacheType) throws IOException; + /** * Lists the Known list of Tables in a DB. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java index b46a742ac3d..5e41f5a854e 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java @@ -107,7 +107,6 @@ public final class DBStoreBuilder { // number in request to avoid increase in heap memory. private long maxDbUpdatesSizeThreshold; private Integer maxNumberOfOpenFiles = null; - private String threadNamePrefix = ""; /** * Create DBStoreBuilder from a generic DBDefinition. @@ -233,7 +232,7 @@ public DBStore build() throws IOException { return new RDBStore(dbFile, rocksDBOption, statistics, writeOptions, tableConfigs, registry.build(), openReadOnly, maxFSSnapshots, dbJmxBeanNameName, enableCompactionDag, maxDbUpdatesSizeThreshold, createCheckpointDirs, - configuration, threadNamePrefix, enableRocksDbMetrics); + configuration, enableRocksDbMetrics); } finally { tableConfigs.forEach(TableConfig::close); } @@ -323,11 +322,6 @@ public DBStoreBuilder setMaxNumberOfOpenFiles(Integer maxNumberOfOpenFiles) { return this; } - public DBStoreBuilder setThreadNamePrefix(String prefix) { - this.threadNamePrefix = prefix; - return this; - } - /** * Converts column families and their corresponding options that have been * registered with the builder to a set of {@link TableConfig} objects. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java index 40d3507d2eb..e3ecaca6386 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java @@ -74,7 +74,6 @@ public class RDBStore implements DBStore { private final long maxDbUpdatesSizeThreshold; private final ManagedDBOptions dbOptions; private final ManagedStatistics statistics; - private final String threadNamePrefix; @SuppressWarnings("parameternumber") public RDBStore(File dbFile, ManagedDBOptions dbOptions, ManagedStatistics statistics, @@ -83,11 +82,10 @@ public RDBStore(File dbFile, ManagedDBOptions dbOptions, ManagedStatistics stati String dbJmxBeanName, boolean enableCompactionDag, long maxDbUpdatesSizeThreshold, boolean createCheckpointDirs, - ConfigurationSource configuration, String threadNamePrefix, + ConfigurationSource configuration, boolean enableRocksDBMetrics) throws IOException { - this.threadNamePrefix = threadNamePrefix; Preconditions.checkNotNull(dbFile, "DB file location cannot be null"); Preconditions.checkNotNull(families); Preconditions.checkArgument(!families.isEmpty()); @@ -302,12 +300,17 @@ public TypedTable getTable(String name, valueType); } + @Override + public TypedTable getTable( + String name, Codec keyCodec, Codec valueCodec, TableCache.CacheType cacheType) throws IOException { + return new TypedTable<>(getTable(name), keyCodec, valueCodec, cacheType); + } + @Override public Table getTable(String name, Class keyType, Class valueType, TableCache.CacheType cacheType) throws IOException { - return new TypedTable<>(getTable(name), codecRegistry, keyType, - valueType, cacheType, threadNamePrefix); + return new TypedTable<>(getTable(name), codecRegistry, keyType, valueType, cacheType); } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java index 24676ac33b5..f39d55327aa 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdds.utils.db.cache.PartialTableCache; import org.apache.hadoop.hdds.utils.db.cache.TableCache; import org.apache.hadoop.hdds.utils.db.cache.TableCache.CacheType; +import org.apache.hadoop.hdds.utils.db.cache.TableNoCache; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.function.CheckedBiFunction; @@ -57,10 +58,9 @@ public class TypedTable implements Table { static final int BUFFER_SIZE_DEFAULT = 4 << 10; // 4 KB private final RDBTable rawTable; + private final String info; - private final Class keyType; private final Codec keyCodec; - private final Class valueType; private final Codec valueCodec; private final boolean supportCodecBuffer; @@ -72,11 +72,9 @@ public class TypedTable implements Table { * The same as this(rawTable, codecRegistry, keyType, valueType, * CacheType.PARTIAL_CACHE). */ - public TypedTable(RDBTable rawTable, - CodecRegistry codecRegistry, Class keyType, - Class valueType) throws IOException { - this(rawTable, codecRegistry, keyType, valueType, - CacheType.PARTIAL_CACHE, ""); + TypedTable(RDBTable rawTable, CodecRegistry codecRegistry, Class keyType, Class valueType) + throws IOException { + this(rawTable, codecRegistry, keyType, valueType, CacheType.PARTIAL_CACHE); } /** @@ -87,32 +85,40 @@ public TypedTable(RDBTable rawTable, * @param keyType The key type. * @param valueType The value type. * @param cacheType How to cache the entries? - * @param threadNamePrefix * @throws IOException if failed to iterate the raw table. */ - public TypedTable(RDBTable rawTable, - CodecRegistry codecRegistry, Class keyType, - Class valueType, - CacheType cacheType, String threadNamePrefix) throws IOException { - this.rawTable = Objects.requireNonNull(rawTable, "rawTable==null"); - Objects.requireNonNull(codecRegistry, "codecRegistry == null"); + TypedTable(RDBTable rawTable, CodecRegistry codecRegistry, Class keyType, Class valueType, + CacheType cacheType) throws IOException { + this(rawTable, codecRegistry.getCodecFromClass(keyType), codecRegistry.getCodecFromClass(valueType), + cacheType); + } - this.keyType = Objects.requireNonNull(keyType, "keyType == null"); - this.keyCodec = codecRegistry.getCodecFromClass(keyType); - Objects.requireNonNull(keyCodec, "keyCodec == null"); + /** + * Create an TypedTable from the raw table with specified cache type. + * + * @param rawTable The underlying (untyped) table in RocksDB. + * @param keyCodec The key codec. + * @param valueCodec The value codec. + * @param cacheType How to cache the entries? + * @throws IOException + */ + public TypedTable( + RDBTable rawTable, Codec keyCodec, Codec valueCodec, CacheType cacheType) throws IOException { + this.rawTable = Objects.requireNonNull(rawTable, "rawTable==null"); + this.keyCodec = Objects.requireNonNull(keyCodec, "keyCodec == null"); + this.valueCodec = Objects.requireNonNull(valueCodec, "valueCodec == null"); - this.valueType = Objects.requireNonNull(valueType, "valueType == null"); - this.valueCodec = codecRegistry.getCodecFromClass(valueType); - Objects.requireNonNull(valueCodec, "valueCodec == null"); + this.info = getClassSimpleName(getClass()) + "-" + getName() + "(" + getClassSimpleName(keyCodec.getTypeClass()) + + "->" + getClassSimpleName(valueCodec.getTypeClass()) + ")"; this.supportCodecBuffer = keyCodec.supportCodecBuffer() && valueCodec.supportCodecBuffer(); + final String threadNamePrefix = rawTable.getName() + "_"; if (cacheType == CacheType.FULL_CACHE) { cache = new FullTableCache<>(threadNamePrefix); //fill cache - try (TableIterator> tableIterator = - iterator()) { + try (TableIterator> tableIterator = iterator()) { while (tableIterator.hasNext()) { KeyValue< KEY, VALUE > kv = tableIterator.next(); @@ -124,8 +130,10 @@ public TypedTable(RDBTable rawTable, CacheValue.get(EPOCH_DEFAULT, kv.getValue())); } } - } else { + } else if (cacheType == CacheType.PARTIAL_CACHE) { cache = new PartialTableCache<>(threadNamePrefix); + } else { + cache = TableNoCache.instance(); } } @@ -443,9 +451,7 @@ public String getName() { @Override public String toString() { - return getClassSimpleName(getClass()) + "-" + getName() - + "(" + getClassSimpleName(keyType) - + "->" + getClassSimpleName(valueType) + ")"; + return info; } @Override @@ -572,14 +578,6 @@ public KEY getKey() throws IOException { public VALUE getValue() throws IOException { return decodeValue(rawKeyValue.getValue()); } - - public byte[] getRawKey() throws IOException { - return rawKeyValue.getKey(); - } - - public byte[] getRawValue() throws IOException { - return rawKeyValue.getValue(); - } } RawIterator newCodecBufferTableIterator( diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java index 62c80a6f787..982c820ada3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java @@ -161,8 +161,7 @@ public CacheResult lookup(CacheKey cachekey) { CacheValue cachevalue = cache.get(cachekey); statsRecorder.recordValue(cachevalue); if (cachevalue == null) { - return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST, - null); + return (CacheResult) MAY_EXIST; } else { if (cachevalue.getCacheValue() != null) { return new CacheResult<>(CacheResult.CacheStatus.EXISTS, cachevalue); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java index d26778459c7..5d3782f76e1 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java @@ -34,6 +34,7 @@ @Private @Evolving public interface TableCache { + CacheResult MAY_EXIST = new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST, null); /** * Return the value for the key if it is present, otherwise return null. @@ -113,7 +114,8 @@ public interface TableCache { enum CacheType { FULL_CACHE, // This mean's the table maintains full cache. Cache and DB // state are same. - PARTIAL_CACHE // This is partial table cache, cache state is partial state + PARTIAL_CACHE, // This is partial table cache, cache state is partial state // compared to DB state. + NO_CACHE } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableNoCache.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableNoCache.java new file mode 100644 index 00000000000..17bb961ac3d --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableNoCache.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.utils.db.cache; + +import com.google.common.annotations.VisibleForTesting; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import org.apache.hadoop.hdds.annotation.InterfaceAudience.Private; +import org.apache.hadoop.hdds.annotation.InterfaceStability.Evolving; + +/** + * Dummy cache implementation for the table, means key/value are not cached. + * @param + * @param + */ +@Private +@Evolving +public final class TableNoCache implements TableCache { + public static final CacheStats EMPTY_STAT = new CacheStats(0, 0, 0); + + private static final TableCache NO_CACHE_INSTANCE = new TableNoCache<>(); + public static TableCache instance() { + return (TableCache) NO_CACHE_INSTANCE; + } + + private TableNoCache() { + } + + @Override + public CacheValue get(CacheKey cachekey) { + return null; + } + + @Override + public void loadInitial(CacheKey key, CacheValue value) { + } + + @Override + public void put(CacheKey cacheKey, CacheValue value) { + } + + @Override + public void cleanup(List epochs) { + } + + @Override + public int size() { + return 0; + } + + @Override + public Iterator, CacheValue>> iterator() { + return Collections.emptyIterator(); + } + + @VisibleForTesting + @Override + public void evictCache(List epochs) { + } + + @Override + public CacheResult lookup(CacheKey cachekey) { + return (CacheResult) MAY_EXIST; + } + + @VisibleForTesting + @Override + public NavigableMap>> getEpochEntries() { + return Collections.emptyNavigableMap(); + } + + @Override + public CacheStats getStats() { + return EMPTY_STAT; + } + + @Override + public CacheType getCacheType() { + return CacheType.NO_CACHE; + } +} diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java index 24d59b5be06..81626f93575 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java @@ -62,7 +62,7 @@ public static RDBStore newRDBStore(File dbFile, ManagedDBOptions options, throws IOException { return new RDBStore(dbFile, options, null, new ManagedWriteOptions(), families, CodecRegistry.newBuilder().build(), false, 1000, null, false, - maxDbUpdatesSizeThreshold, true, null, "", true); + maxDbUpdatesSizeThreshold, true, null, true); } public static final int MAX_DB_UPDATES_SIZE_THRESHOLD = 80; diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java index 0ba41fdfa5c..37f81369f91 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java @@ -28,6 +28,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import com.google.protobuf.ByteString; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -41,6 +42,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.utils.MetadataKeyFilters; +import org.apache.hadoop.hdds.utils.db.cache.TableCache; import org.apache.hadoop.hdds.utils.db.managed.ManagedColumnFamilyOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions; import org.junit.jupiter.api.AfterEach; @@ -68,7 +70,8 @@ public class TestRDBTableStore { "First", "Second", "Third", "Fourth", "Fifth", "Sixth", "Seventh", - "Eighth", "Ninth"); + "Eighth", "Ninth", + "Ten"); private final List prefixedFamilies = Arrays.asList( "PrefixFirst", "PrefixTwo", "PrefixThree", @@ -304,6 +307,19 @@ public void batchDelete() throws Exception { } } + @Test + public void putGetTypedTableCodec() throws Exception { + try (Table testTable = rdbStore.getTable("Ten", String.class, String.class)) { + testTable.put("test1", "123"); + assertFalse(testTable.isEmpty()); + assertEquals("123", testTable.get("test1")); + } + try (Table testTable = rdbStore.getTable("Ten", + StringCodec.get(), ByteStringCodec.get(), TableCache.CacheType.NO_CACHE)) { + assertEquals("123", testTable.get("test1").toStringUtf8()); + } + } + @Test public void forEachAndIterator() throws Exception { final int iterCount = 100; diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java index 46c3cae975c..7a1689a79a7 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.utils.db.cache; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.fail; @@ -25,10 +26,12 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.event.Level; /** @@ -46,13 +49,18 @@ public static void setLogLevel() { private void createTableCache(TableCache.CacheType cacheType) { if (cacheType == TableCache.CacheType.FULL_CACHE) { tableCache = new FullTableCache<>(""); - } else { + } else if (cacheType == TableCache.CacheType.PARTIAL_CACHE) { tableCache = new PartialTableCache<>(""); + } else { + tableCache = TableNoCache.instance(); } } + private static Stream cacheTypeList() { + return Stream.of(TableCache.CacheType.FULL_CACHE, TableCache.CacheType.PARTIAL_CACHE); + } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testPartialTableCache(TableCache.CacheType cacheType) { createTableCache(cacheType); @@ -96,7 +104,7 @@ private void verifyStats(TableCache cache, } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testTableCacheWithRenameKey(TableCache.CacheType cacheType) { createTableCache(cacheType); @@ -152,7 +160,7 @@ public void testTableCacheWithRenameKey(TableCache.CacheType cacheType) { } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testPartialTableCacheWithNotContinuousEntries( TableCache.CacheType cacheType) { @@ -203,7 +211,7 @@ public void testPartialTableCacheWithNotContinuousEntries( } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testPartialTableCacheWithOverrideEntries( TableCache.CacheType cacheType) { @@ -274,7 +282,7 @@ public void testPartialTableCacheWithOverrideEntries( } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testPartialTableCacheWithOverrideAndDelete( TableCache.CacheType cacheType) { @@ -371,7 +379,7 @@ public void testPartialTableCacheWithOverrideAndDelete( } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testPartialTableCacheParallel( TableCache.CacheType cacheType) throws Exception { @@ -455,7 +463,7 @@ public void testPartialTableCacheParallel( } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testTableCache(TableCache.CacheType cacheType) { createTableCache(cacheType); @@ -488,7 +496,7 @@ public void testTableCache(TableCache.CacheType cacheType) { @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testTableCacheWithNonConsecutiveEpochList( TableCache.CacheType cacheType) { @@ -559,7 +567,7 @@ public void testTableCacheWithNonConsecutiveEpochList( } @ParameterizedTest - @EnumSource(TableCache.CacheType.class) + @MethodSource("cacheTypeList") public void testTableCacheStats(TableCache.CacheType cacheType) { createTableCache(cacheType); @@ -581,6 +589,18 @@ public void testTableCacheStats(TableCache.CacheType cacheType) { verifyStats(tableCache, 3, 2, 2); } + @Test + public void testNoCache() { + createTableCache(TableCache.CacheType.NO_CACHE); + tableCache.put(new CacheKey<>("0"), CacheValue.get(0, "0")); + assertNull(tableCache.get(new CacheKey<>("0"))); + assertEquals(tableCache.getCacheType(), TableCache.CacheType.NO_CACHE); + assertEquals(0, tableCache.size()); + assertEquals(0, tableCache.getEpochEntries().size()); + assertFalse(tableCache.iterator().hasNext()); + verifyStats(tableCache, 0, 0, 0); + } + private int writeToCache(int count, int startVal, long sleep) throws InterruptedException { int counter = 1; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java index 5998be91833..e8c0178612e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java @@ -329,25 +329,17 @@ public long getNumCommandsDatanodeFailed() { @Override public String toString() { - StringBuffer buffer = new StringBuffer(); - buffer.append("numBlockDeletionTransactionCreated = " - + numBlockDeletionTransactionCreated.value()).append("\t") - .append("numBlockDeletionTransactionCompleted = " - + numBlockDeletionTransactionCompleted.value()).append("\t") - .append("numBlockDeletionCommandSent = " - + numBlockDeletionCommandSent.value()).append("\t") - .append("numBlockDeletionCommandSuccess = " - + numBlockDeletionCommandSuccess.value()).append("\t") - .append("numBlockDeletionCommandFailure = " - + numBlockDeletionCommandFailure.value()).append("\t") - .append("numBlockDeletionTransactionSent = " - + numBlockDeletionTransactionSent.value()).append("\t") - .append("numBlockDeletionTransactionSuccess = " - + numBlockDeletionTransactionSuccess.value()).append("\t") - .append("numBlockDeletionTransactionFailure = " - + numBlockDeletionTransactionFailure.value()).append("\t") - .append("numDeletionCommandsPerDatanode = " - + numCommandsDatanode); + StringBuilder buffer = new StringBuilder() + .append("numBlockDeletionTransactionCreated = ").append(numBlockDeletionTransactionCreated.value()).append("\t") + .append("numBlockDeletionTransactionCompleted = ") + .append(numBlockDeletionTransactionCompleted.value()).append("\t") + .append("numBlockDeletionCommandSent = ").append(numBlockDeletionCommandSent.value()).append("\t") + .append("numBlockDeletionCommandSuccess = ").append(numBlockDeletionCommandSuccess.value()).append("\t") + .append("numBlockDeletionCommandFailure = ").append(numBlockDeletionCommandFailure.value()).append("\t") + .append("numBlockDeletionTransactionSent = ").append(numBlockDeletionTransactionSent.value()).append("\t") + .append("numBlockDeletionTransactionSuccess = ").append(numBlockDeletionTransactionSuccess.value()).append("\t") + .append("numBlockDeletionTransactionFailure = ").append(numBlockDeletionTransactionFailure.value()).append("\t") + .append("numDeletionCommandsPerDatanode = ").append(numCommandsDatanode); return buffer.toString(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index 113f620ebcd..813c91c2398 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -17,16 +17,11 @@ package org.apache.hadoop.hdds.scm.container; -import static java.util.Comparator.reverseOrder; import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.CONTAINER_ID; -import static org.apache.hadoop.hdds.utils.CollectionUtils.findTopN; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -144,21 +139,20 @@ public ContainerInfo getContainer(final ContainerID id) @Override public List getContainers(ReplicationType type) { - return toContainers(containerStateManager.getContainerIDs(type)); + return containerStateManager.getContainerInfos(type); } @Override public List getContainers(final ContainerID startID, final int count) { scmContainerManagerMetrics.incNumListContainersOps(); - return toContainers(filterSortAndLimit(startID, count, - containerStateManager.getContainerIDs())); + return containerStateManager.getContainerInfos(startID, count); } @Override public List getContainers(final LifeCycleState state) { scmContainerManagerMetrics.incNumListContainersOps(); - return toContainers(containerStateManager.getContainerIDs(state)); + return containerStateManager.getContainerInfos(state); } @Override @@ -166,13 +160,12 @@ public List getContainers(final ContainerID startID, final int count, final LifeCycleState state) { scmContainerManagerMetrics.incNumListContainersOps(); - return toContainers(filterSortAndLimit(startID, count, - containerStateManager.getContainerIDs(state))); + return containerStateManager.getContainerInfos(state, startID, count); } @Override public int getContainerStateCount(final LifeCycleState state) { - return containerStateManager.getContainerIDs(state).size(); + return containerStateManager.getContainerCount(state); } @Override @@ -318,7 +311,7 @@ public void updateContainerReplica(final ContainerID cid, final ContainerReplica replica) throws ContainerNotFoundException { if (containerExist(cid)) { - containerStateManager.updateContainerReplica(cid, replica); + containerStateManager.updateContainerReplica(replica); } else { throwContainerNotFoundException(cid); } @@ -329,7 +322,7 @@ public void removeContainerReplica(final ContainerID cid, final ContainerReplica replica) throws ContainerNotFoundException, ContainerReplicaNotFoundException { if (containerExist(cid)) { - containerStateManager.removeContainerReplica(cid, replica); + containerStateManager.removeContainerReplica(replica); } else { throwContainerNotFoundException(cid); } @@ -469,33 +462,4 @@ public ContainerStateManager getContainerStateManager() { public SCMHAManager getSCMHAManager() { return haManager; } - - private static List filterSortAndLimit( - ContainerID startID, int count, Set set) { - - if (ContainerID.MIN.equals(startID) && count >= set.size()) { - List list = new ArrayList<>(set); - Collections.sort(list); - return list; - } - - return findTopN(set, count, reverseOrder(), - id -> id.compareTo(startID) >= 0); - } - - /** - * Returns a list of all containers identified by {@code ids}. - */ - private List toContainers(Collection ids) { - List containers = new ArrayList<>(ids.size()); - - for (ContainerID id : ids) { - ContainerInfo container = containerStateManager.getContainer(id); - if (container != null) { - containers.add(container); - } - } - - return containers; - } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java index 99ddba2aa46..43267d42657 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java @@ -54,6 +54,10 @@ private ContainerReplica(ContainerReplicaBuilder b) { sequenceId = b.sequenceId; } + public ContainerID getContainerID() { + return containerID; + } + /** * Returns the DatanodeDetails to which this replica belongs. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index 0ec9c2593ec..cee661e87d0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.container; import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.NavigableSet; import java.util.Set; @@ -103,22 +104,33 @@ public interface ContainerStateManager { boolean contains(ContainerID containerID); /** - * Returns the ID of all the managed containers. + * Get {@link ContainerInfo}s. * - * @return Set of {@link ContainerID} + * @param start the start {@link ContainerID} (inclusive) + * @param count the size limit + * @return a list of {@link ContainerInfo}; */ - Set getContainerIDs(); + List getContainerInfos(ContainerID start, int count); /** + * Get {@link ContainerInfo}s for the given state. * + * @param start the start {@link ContainerID} (inclusive) + * @param count the size limit + * @return a list of {@link ContainerInfo}; */ - Set getContainerIDs(LifeCycleState state); + List getContainerInfos(LifeCycleState state, ContainerID start, int count); + /** @return all {@link ContainerInfo}s for the given state. */ + List getContainerInfos(LifeCycleState state); /** - * Returns the IDs of the Containers whose ReplicationType matches the given type. + * @return number of containers for the given state. */ - Set getContainerIDs(ReplicationType type); + int getContainerCount(LifeCycleState state); + + /** @return all {@link ContainerInfo}s for the given type. */ + List getContainerInfos(ReplicationType type); /** * @@ -133,14 +145,12 @@ public interface ContainerStateManager { /** * */ - void updateContainerReplica(ContainerID id, - ContainerReplica replica); + void updateContainerReplica(ContainerReplica replica); /** * */ - void removeContainerReplica(ContainerID id, - ContainerReplica replica); + void removeContainerReplica(ContainerReplica replica); /** * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 693eed710bb..5ada3ecbbeb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -32,13 +32,14 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE_DEFAULT; -import com.google.common.base.Preconditions; import com.google.common.util.concurrent.Striped; import java.io.IOException; import java.util.EnumMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.NavigableSet; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReadWriteLock; @@ -239,7 +240,7 @@ private void initialize() throws IOException { while (iterator.hasNext()) { final ContainerInfo container = iterator.next().getValue(); - Preconditions.checkNotNull(container); + Objects.requireNonNull(container, "container == null"); containers.addContainer(container); if (container.getState() == LifeCycleState.OPEN) { try { @@ -268,23 +269,37 @@ private void initialize() throws IOException { } @Override - public Set getContainerIDs() { + public List getContainerInfos(ContainerID start, int count) { try (AutoCloseableLock ignored = readLock()) { - return containers.getAllContainerIDs(); + return containers.getContainerInfos(start, count); } } @Override - public Set getContainerIDs(final LifeCycleState state) { + public List getContainerInfos(LifeCycleState state, ContainerID start, int count) { try (AutoCloseableLock ignored = readLock()) { - return containers.getContainerIDsByState(state); + return containers.getContainerInfos(state, start, count); } } @Override - public Set getContainerIDs(final ReplicationType type) { + public List getContainerInfos(final LifeCycleState state) { try (AutoCloseableLock ignored = readLock()) { - return containers.getContainerIDsByType(type); + return containers.getContainerInfos(state); + } + } + + @Override + public List getContainerInfos(ReplicationType type) { + try (AutoCloseableLock ignored = readLock()) { + return containers.getContainerInfos(type); + } + } + + @Override + public int getContainerCount(final LifeCycleState state) { + try (AutoCloseableLock ignored = readLock()) { + return containers.getContainerCount(state); } } @@ -303,7 +318,7 @@ public void addContainer(final ContainerInfoProto containerInfo) // ClosedPipelineException once ClosedPipelineException is introduced // in PipelineManager. - Preconditions.checkNotNull(containerInfo); + Objects.requireNonNull(containerInfo, "containerInfo == null"); final ContainerInfo container = ContainerInfo.fromProtobuf(containerInfo); final ContainerID containerID = container.containerID(); final PipelineID pipelineID = container.getPipelineID(); @@ -400,10 +415,10 @@ public Set getContainerReplicas(final ContainerID id) { } @Override - public void updateContainerReplica(final ContainerID id, - final ContainerReplica replica) { + public void updateContainerReplica(final ContainerReplica replica) { + final ContainerID id = replica.getContainerID(); try (AutoCloseableLock ignored = writeLock(id)) { - containers.updateContainerReplica(id, replica); + containers.updateContainerReplica(replica); // Clear any pending additions for this replica as we have now seen it. containerReplicaPendingOps.completeAddReplica(id, replica.getDatanodeDetails(), replica.getReplicaIndex()); @@ -411,10 +426,10 @@ public void updateContainerReplica(final ContainerID id, } @Override - public void removeContainerReplica(final ContainerID id, - final ContainerReplica replica) { + public void removeContainerReplica(final ContainerReplica replica) { + final ContainerID id = replica.getContainerID(); try (AutoCloseableLock ignored = writeLock(id)) { - containers.removeContainerReplica(id, replica); + containers.removeContainerReplica(id, replica.getDatanodeDetails().getID()); // Remove any pending delete replication operations for the deleted // replica. containerReplicaPendingOps.completeDeleteReplica(id, @@ -601,9 +616,9 @@ public Builder setContainerReplicaPendingOps( } public ContainerStateManager build() throws IOException { - Preconditions.checkNotNull(conf); - Preconditions.checkNotNull(pipelineMgr); - Preconditions.checkNotNull(table); + Objects.requireNonNull(conf, "conf == null"); + Objects.requireNonNull(pipelineMgr, "pipelineMgr == null"); + Objects.requireNonNull(table, "table == null"); final ContainerStateManager csm = new ContainerStateManagerImpl( conf, pipelineMgr, table, transactionBuffer, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java index bbcf498ec56..bf4f0b92fdd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java @@ -40,7 +40,7 @@ public enum HealthState { private final ContainerInfo containerInfo; private final HealthState healthState; - private final List commands = new ArrayList<>(); + private final List> commands = new ArrayList<>(); public ContainerHealthResult(ContainerInfo containerInfo, HealthState healthState) { @@ -52,11 +52,11 @@ public HealthState getHealthState() { return healthState; } - public void addCommand(SCMCommand command) { + public void addCommand(SCMCommand command) { commands.add(command); } - public List getCommands() { + public List> getCommands() { return commands; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java new file mode 100644 index 00000000000..3ff23b495e8 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; + +/** + * Class to correct over replicated QuasiClosed Stuck Ratis containers. + */ +public class QuasiClosedStuckOverReplicationHandler implements UnhealthyReplicationHandler { + + private static final org.slf4j.Logger LOG = + org.slf4j.LoggerFactory.getLogger(QuasiClosedStuckOverReplicationHandler.class); + private final ReplicationManager replicationManager; + private final ReplicationManagerMetrics metrics; + + public QuasiClosedStuckOverReplicationHandler(final ReplicationManager replicationManager) { + this.replicationManager = replicationManager; + this.metrics = replicationManager.getMetrics(); + } + + @Override + public int processAndSendCommands(Set replicas, List pendingOps, + ContainerHealthResult result, int remainingMaintenanceRedundancy) + throws IOException { + + ContainerInfo containerInfo = result.getContainerInfo(); + LOG.debug("Handling over replicated QuasiClosed Stuck Ratis container {}", containerInfo); + + int pendingDelete = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + + if (pendingDelete > 0) { + LOG.debug("Container {} has pending delete operations. No more over replication will be scheduled until they " + + "complete", containerInfo); + return 0; + } + + // Filter out any STALE replicas, as they may go dead soon. If so, we don't want to remove other healthy replicas + // instead of them, as they could result in under replication. + Set healthyReplicas = replicas.stream() + .filter(replica -> { + try { + return replicationManager.getNodeStatus( + replica.getDatanodeDetails()).getHealth() == HddsProtos.NodeState.HEALTHY; + } catch (NodeNotFoundException e) { + return false; + } + }) + .collect(Collectors.toSet()); + + QuasiClosedStuckReplicaCount replicaCount = + new QuasiClosedStuckReplicaCount(healthyReplicas, remainingMaintenanceRedundancy); + + List misReplicatedOrigins + = replicaCount.getOverReplicatedOrigins(); + + if (misReplicatedOrigins.isEmpty()) { + LOG.debug("Container {} is not over replicated", containerInfo); + return 0; + } + + int totalCommandsSent = 0; + IOException firstException = null; + for (QuasiClosedStuckReplicaCount.MisReplicatedOrigin origin : misReplicatedOrigins) { + List sortedReplicas = getSortedReplicas(origin.getSources()); + for (int i = 0; i < origin.getReplicaDelta(); i++) { + try { + replicationManager.sendThrottledDeleteCommand( + containerInfo, 0, sortedReplicas.get(i).getDatanodeDetails(), true); + totalCommandsSent++; + } catch (CommandTargetOverloadedException e) { + LOG.debug("Unable to send delete command for container {} to {} as it has too many pending delete commands", + containerInfo, sortedReplicas.get(i).getDatanodeDetails()); + firstException = e; + } + } + } + + if (firstException != null) { + // Some nodes were overloaded when attempting to send commands. + if (totalCommandsSent > 0) { + metrics.incrPartialReplicationTotal(); + } + throw firstException; + } + return totalCommandsSent; + } + + private List getSortedReplicas( + Set replicas) { + // sort replicas so that they can be selected in a deterministic way + return replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckReplicaCount.java new file mode 100644 index 00000000000..412978c240e --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckReplicaCount.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; + +/** + * Class to count the replicas in a quasi-closed stuck container. + */ +public class QuasiClosedStuckReplicaCount { + + private final Map> replicasByOrigin = new HashMap<>(); + private final Map> inServiceReplicasByOrigin = new HashMap<>(); + private final Map> maintenanceReplicasByOrigin = new HashMap<>(); + private boolean hasOutOfServiceReplicas = false; + private int minHealthyForMaintenance; + private boolean hasHealthyReplicas = false; + + public QuasiClosedStuckReplicaCount(Set replicas, int minHealthyForMaintenance) { + this.minHealthyForMaintenance = minHealthyForMaintenance; + for (ContainerReplica r : replicas) { + if (r.getState() != StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY) { + hasHealthyReplicas = true; + } + replicasByOrigin.computeIfAbsent(r.getOriginDatanodeId(), k -> new HashSet<>()).add(r); + HddsProtos.NodeOperationalState opState = r.getDatanodeDetails().getPersistedOpState(); + if (opState == HddsProtos.NodeOperationalState.IN_SERVICE) { + inServiceReplicasByOrigin.computeIfAbsent(r.getOriginDatanodeId(), k -> new HashSet<>()).add(r); + } else if (opState == HddsProtos.NodeOperationalState.IN_MAINTENANCE + || opState == HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE) { + maintenanceReplicasByOrigin.computeIfAbsent(r.getOriginDatanodeId(), k -> new HashSet<>()).add(r); + hasOutOfServiceReplicas = true; + } else { + hasOutOfServiceReplicas = true; + } + } + } + + public int availableOrigins() { + return replicasByOrigin.size(); + } + + public boolean hasOutOfServiceReplicas() { + return hasOutOfServiceReplicas; + } + + public boolean hasHealthyReplicas() { + return hasHealthyReplicas; + } + + public boolean isUnderReplicated() { + return !getUnderReplicatedReplicas().isEmpty(); + } + + public List getUnderReplicatedReplicas() { + List misReplicatedOrigins = new ArrayList<>(); + + if (replicasByOrigin.size() == 1) { + Map.Entry> entry = replicasByOrigin.entrySet().iterator().next(); + Set inService = inServiceReplicasByOrigin.get(entry.getKey()); + if (inService == null) { + inService = Collections.emptySet(); + } + Set maintenance = maintenanceReplicasByOrigin.get(entry.getKey()); + int maintenanceCount = maintenance == null ? 0 : maintenance.size(); + + if (maintenanceCount > 0) { + if (inService.size() < minHealthyForMaintenance) { + int additionalReplicas = minHealthyForMaintenance - inService.size(); + misReplicatedOrigins.add(new MisReplicatedOrigin(entry.getValue(), additionalReplicas)); + } + } else { + if (inService.size() < 3) { + int additionalReplicas = 3 - inService.size(); + misReplicatedOrigins.add(new MisReplicatedOrigin(entry.getValue(), additionalReplicas)); + } + } + return misReplicatedOrigins; + } + + // If there are multiple origins, we expect 2 copies of each origin + // For maintenance, we expect 1 copy of each origin and ignore the minHealthyForMaintenance parameter + for (Map.Entry> entry : replicasByOrigin.entrySet()) { + Set inService = inServiceReplicasByOrigin.get(entry.getKey()); + if (inService == null) { + inService = Collections.emptySet(); + } + Set maintenance = maintenanceReplicasByOrigin.get(entry.getKey()); + int maintenanceCount = maintenance == null ? 0 : maintenance.size(); + + if (inService.size() < 2) { + if (maintenanceCount > 0) { + if (inService.isEmpty()) { + // We need 1 copy online for maintenance + misReplicatedOrigins.add(new MisReplicatedOrigin(entry.getValue(), 1)); + } + } else { + misReplicatedOrigins.add(new MisReplicatedOrigin(entry.getValue(), 2 - inService.size())); + } + } + } + return misReplicatedOrigins; + } + + /** + * Returns True is the container is over-replicated. This means that if we have a single origin, there are more than + * 3 copies. If we have multiple origins, there are more than 2 copies of each origin. + * The over replication check ignore maintenance replicas. The container may become over replicated when maintenance + * ends. + * + * @return True if the container is over-replicated, otherwise false + */ + public boolean isOverReplicated() { + return !getOverReplicatedOrigins().isEmpty(); + } + + public List getOverReplicatedOrigins() { + // If there is only a single origin, we expect 3 copies, otherwise we expect 2 copies of each origin + if (replicasByOrigin.size() == 1) { + UUID origin = replicasByOrigin.keySet().iterator().next(); + Set inService = inServiceReplicasByOrigin.get(origin); + if (inService != null && inService.size() > 3) { + return Collections.singletonList(new MisReplicatedOrigin(inService, inService.size() - 3)); + } + return Collections.emptyList(); + } + + // If there are multiple origins, we expect 2 copies of each origin + List overReplicatedOrigins = new ArrayList<>(); + for (UUID origin : replicasByOrigin.keySet()) { + Set replicas = inServiceReplicasByOrigin.get(origin); + if (replicas != null && replicas.size() > 2) { + overReplicatedOrigins.add(new MisReplicatedOrigin(replicas, replicas.size() - 2)); + } + } + // If we have 2 copies or less of each origin, we are not over-replicated + return overReplicatedOrigins; + } + + /** + * Class to represent the origin of under replicated replicas and the number of additional replicas required. + */ + public static class MisReplicatedOrigin { + + private final Set sources; + private final int replicaDelta; + + public MisReplicatedOrigin(Set sources, int replicaDelta) { + this.sources = sources; + this.replicaDelta = replicaDelta; + } + + public Set getSources() { + return sources; + } + + public int getReplicaDelta() { + return replicaDelta; + } + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckUnderReplicationHandler.java new file mode 100644 index 00000000000..fd442eb1976 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckUnderReplicationHandler.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.pipeline.InsufficientDatanodesException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to correct under replicated QuasiClosed Stuck Ratis containers. + */ +public class QuasiClosedStuckUnderReplicationHandler implements UnhealthyReplicationHandler { + public static final Logger LOG = LoggerFactory.getLogger(QuasiClosedStuckUnderReplicationHandler.class); + + private final PlacementPolicy placementPolicy; + private final ReplicationManager replicationManager; + private final long currentContainerSize; + private final ReplicationManagerMetrics metrics; + + public QuasiClosedStuckUnderReplicationHandler(final PlacementPolicy placementPolicy, + final ConfigurationSource conf, final ReplicationManager replicationManager) { + this.placementPolicy = placementPolicy; + this.currentContainerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + this.replicationManager = replicationManager; + this.metrics = replicationManager.getMetrics(); + } + + @Override + public int processAndSendCommands(Set replicas, List pendingOps, + ContainerHealthResult result, int remainingMaintenanceRedundancy) throws IOException { + ContainerInfo containerInfo = result.getContainerInfo(); + LOG.debug("Handling under replicated QuasiClosed Stuck Ratis container {}", containerInfo); + + int pendingAdd = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + pendingAdd++; + } + } + + if (pendingAdd > 0) { + LOG.debug("Container {} has pending add operations. No more replication will be scheduled until they complete", + containerInfo); + return 0; + } + + QuasiClosedStuckReplicaCount replicaCount = + new QuasiClosedStuckReplicaCount(replicas, remainingMaintenanceRedundancy); + + List misReplicatedOrigins + = replicaCount.getUnderReplicatedReplicas(); + + if (misReplicatedOrigins.isEmpty()) { + LOG.debug("Container {} is not under replicated", containerInfo); + return 0; + } + + // Schedule Replicas for the under replicated origins. + int totalRequiredReplicas = 0; + int totalCommandsSent = 0; + IOException firstException = null; + List mutablePendingOps = new ArrayList<>(pendingOps); + for (QuasiClosedStuckReplicaCount.MisReplicatedOrigin origin : misReplicatedOrigins) { + totalRequiredReplicas += origin.getReplicaDelta(); + List targets; + try { + targets = getTargets(containerInfo, replicas, origin.getReplicaDelta(), mutablePendingOps); + } catch (SCMException e) { + if (firstException == null) { + firstException = e; + } + LOG.warn("Cannot replicate container {} because no suitable targets were found.", containerInfo, e); + continue; + } + + List sourceDatanodes = origin.getSources().stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + for (DatanodeDetails target : targets) { + try { + replicationManager.sendThrottledReplicationCommand(containerInfo, sourceDatanodes, target, 0); + // Add the pending op, so we exclude the node for subsequent origins + mutablePendingOps.add(ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, target, 0)); + totalCommandsSent++; + } catch (CommandTargetOverloadedException e) { + LOG.warn("Cannot replicate container {} because all sources are overloaded.", containerInfo); + if (firstException == null) { + firstException = e; + } + } + } + } + + if (firstException != null || totalCommandsSent < totalRequiredReplicas) { + // Some commands were not sent as expected (not enough nodes found or overloaded nodes), so we just rethrow + // the first exception we encountered. + LOG.info("A command was not sent for all required new replicas for container {}. Total sent {}, required {} ", + containerInfo, totalCommandsSent, totalRequiredReplicas); + metrics.incrPartialReplicationTotal(); + if (firstException != null) { + throw firstException; + } else { + throw new InsufficientDatanodesException(totalRequiredReplicas, totalCommandsSent); + } + } + return totalCommandsSent; + } + + private List getTargets(ContainerInfo containerInfo, + Set replicas, int additionalRequired, List pendingOps) throws IOException { + LOG.debug("Need {} target datanodes for container {}. Current replicas: {}.", + additionalRequired, containerInfo, replicas); + + ReplicationManagerUtil.ExcludedAndUsedNodes excludedAndUsedNodes = + ReplicationManagerUtil.getExcludedAndUsedNodes(containerInfo, new ArrayList<>(replicas), Collections.emptySet(), + pendingOps, replicationManager); + + List excluded = excludedAndUsedNodes.getExcludedNodes(); + List used = excludedAndUsedNodes.getUsedNodes(); + + LOG.debug("UsedList: {}, size {}. ExcludeList: {}, size: {}. ", + used, used.size(), excluded, excluded.size()); + + return ReplicationManagerUtil.getTargetDatanodes(placementPolicy, + additionalRequired, used, excluded, currentContainerSize, containerInfo); + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 0dee75f559e..09245b2bee7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdds.scm.container.replication.health.MismatchedReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.OpenContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.QuasiClosedContainerHandler; +import org.apache.hadoop.hdds.scm.container.replication.health.QuasiClosedStuckReplicationCheck; import org.apache.hadoop.hdds.scm.container.replication.health.RatisReplicationCheckHandler; import org.apache.hadoop.hdds.scm.container.replication.health.RatisUnhealthyReplicationCheckHandler; import org.apache.hadoop.hdds.scm.container.replication.health.VulnerableUnhealthyReplicasHandler; @@ -182,6 +183,8 @@ public class ReplicationManager implements SCMService, ContainerReplicaPendingOp private final RatisUnderReplicationHandler ratisUnderReplicationHandler; private final RatisOverReplicationHandler ratisOverReplicationHandler; private final RatisMisReplicationHandler ratisMisReplicationHandler; + private final QuasiClosedStuckUnderReplicationHandler quasiClosedStuckUnderReplicationHandler; + private final QuasiClosedStuckOverReplicationHandler quasiClosedStuckOverReplicationHandler; private Thread underReplicatedProcessorThread; private Thread overReplicatedProcessorThread; private final UnderReplicatedProcessor underReplicatedProcessor; @@ -248,6 +251,9 @@ public ReplicationManager(final ConfigurationSource conf, new RatisOverReplicationHandler(ratisContainerPlacement, this); ratisMisReplicationHandler = new RatisMisReplicationHandler( ratisContainerPlacement, conf, this); + quasiClosedStuckUnderReplicationHandler = + new QuasiClosedStuckUnderReplicationHandler(ratisContainerPlacement, conf, this); + quasiClosedStuckOverReplicationHandler = new QuasiClosedStuckOverReplicationHandler(this); underReplicatedProcessor = new UnderReplicatedProcessor(this, rmConf::getUnderReplicatedInterval); overReplicatedProcessor = @@ -262,6 +268,7 @@ public ReplicationManager(final ConfigurationSource conf, .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) + .addNext(new QuasiClosedStuckReplicationCheck()) .addNext(ecReplicationCheckHandler) .addNext(ratisReplicationCheckHandler) .addNext(new ClosedWithUnhealthyReplicasHandler(this)) @@ -746,8 +753,15 @@ int processUnderReplicatedContainer( if (result.getHealthState() == ContainerHealthResult.HealthState.UNDER_REPLICATED) { - handler = isEC ? ecUnderReplicationHandler - : ratisUnderReplicationHandler; + if (isEC) { + handler = ecUnderReplicationHandler; + } else { + if (QuasiClosedStuckReplicationCheck.shouldHandleAsQuasiClosedStuck(result.getContainerInfo(), replicas)) { + handler = quasiClosedStuckUnderReplicationHandler; + } else { + handler = ratisUnderReplicationHandler; + } + } } else if (result.getHealthState() == ContainerHealthResult.HealthState.MIS_REPLICATED) { handler = isEC ? ecMisReplicationHandler : ratisMisReplicationHandler; @@ -769,8 +783,16 @@ int processOverReplicatedContainer( containerReplicaPendingOps.getPendingOps(containerID); final boolean isEC = isEC(result.getContainerInfo().getReplicationConfig()); - final UnhealthyReplicationHandler handler = isEC ? ecOverReplicationHandler - : ratisOverReplicationHandler; + UnhealthyReplicationHandler handler; + if (isEC) { + handler = ecOverReplicationHandler; + } else { + if (QuasiClosedStuckReplicationCheck.shouldHandleAsQuasiClosedStuck(result.getContainerInfo(), replicas)) { + handler = quasiClosedStuckOverReplicationHandler; + } else { + handler = ratisOverReplicationHandler; + } + } return handler.processAndSendCommands(replicas, pendingOps, result, getRemainingMaintenanceRedundancy(isEC)); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java index 11b45755a62..bfac61f404f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java @@ -85,6 +85,17 @@ public boolean handle(ContainerCheckRequest request) { return false; } + /** + * Returns true if the container is stuck in QUASI_CLOSED state, otherwise false. + * @param container The container to check + * @param replicas Set of ContainerReplicas + * @return true if the container is stuck in QUASI_CLOSED state, otherwise false + */ + public static boolean isQuasiClosedStuck(final ContainerInfo container, + final Set replicas) { + return !canForceCloseContainer(container, replicas); + } + /** * Returns true if more than 50% of the container replicas with unique * originNodeId are in QUASI_CLOSED state. @@ -93,7 +104,7 @@ public boolean handle(ContainerCheckRequest request) { * @param replicas Set of ContainerReplicas * @return true if we can force close the container, false otherwise */ - private boolean canForceCloseContainer(final ContainerInfo container, + private static boolean canForceCloseContainer(final ContainerInfo container, final Set replicas) { final int replicationFactor = container.getReplicationConfig().getRequiredNodes(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedStuckReplicationCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedStuckReplicationCheck.java new file mode 100644 index 00000000000..7882dfd32eb --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedStuckReplicationCheck.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication.health; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; + +import java.util.Set; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; +import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult; +import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp; +import org.apache.hadoop.hdds.scm.container.replication.QuasiClosedStuckReplicaCount; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to check for the replication of the replicas in quasi-closed stuck containers. As we want to maintain + * as much data and information as possible, the rule for QC stuck container is to maintain 2 copies of each origin + * if there is more than 1 origin. If there is only 1 origin, then we need to maintain 3 copies. + */ +public class QuasiClosedStuckReplicationCheck extends AbstractCheck { + public static final Logger LOG = LoggerFactory.getLogger(QuasiClosedStuckReplicationCheck.class); + + public static boolean shouldHandleAsQuasiClosedStuck(ContainerInfo containerInfo, Set replicas) { + if (containerInfo.getState() != QUASI_CLOSED) { + return false; + } + if (!QuasiClosedContainerHandler.isQuasiClosedStuck(containerInfo, replicas)) { + return false; + } + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 0); + if (replicaCount.availableOrigins() == 1) { + // This is the 3 copies of a single origin case, so allow it to be handled via the normal under-replicated + // handler. + return false; + } + // If we have all origins with open replicas, and not unhealthy then the container should close after the close + // goes through, so this handler should not run. + return !hasEnoughOriginsWithOpen(containerInfo, replicas); + } + + @Override + public boolean handle(ContainerCheckRequest request) { + if (!shouldHandleAsQuasiClosedStuck(request.getContainerInfo(), request.getContainerReplicas())) { + return false; + } + + if (request.getContainerReplicas().isEmpty()) { + // If there are no replicas, then mark as missing and return. + request.getReport().incrementAndSample( + ReplicationManagerReport.HealthState.MISSING, request.getContainerInfo().containerID()); + return true; + } + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount( + request.getContainerReplicas(), request.getMaintenanceRedundancy()); + + if (!replicaCount.hasHealthyReplicas()) { + // All unhealthy are handled by a different handler + return false; + } + + int pendingAdd = 0; + int pendingDelete = 0; + for (ContainerReplicaOp op : request.getPendingOps()) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + pendingAdd++; + } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + + if (replicaCount.isUnderReplicated()) { + LOG.debug("Container {} is quasi-closed-stuck under-replicated", request.getContainerInfo()); + request.getReport().incrementAndSample(ReplicationManagerReport.HealthState.UNDER_REPLICATED, + request.getContainerInfo().containerID()); + if (pendingAdd == 0) { + // Only queue if there are no pending adds, as that could correct the under replication. + LOG.debug("Queueing under-replicated health result for container {}", request.getContainerInfo()); + ContainerHealthResult.UnderReplicatedHealthResult underReplicatedHealthResult = + new ContainerHealthResult.UnderReplicatedHealthResult(request.getContainerInfo(), 1, + replicaCount.hasOutOfServiceReplicas(), false, false); + request.getReplicationQueue().enqueue(underReplicatedHealthResult); + } + return true; + } + + if (replicaCount.isOverReplicated()) { + LOG.debug("Container {} is quasi-closed-stuck over-replicated", request.getContainerInfo()); + request.getReport().incrementAndSample(ReplicationManagerReport.HealthState.OVER_REPLICATED, + request.getContainerInfo().containerID()); + if (pendingDelete == 0) { + // Only queue if there are no pending deletes which could correct the over replication + LOG.debug("Queueing over-replicated health result for container {}", request.getContainerInfo()); + ContainerHealthResult.OverReplicatedHealthResult overReplicatedHealthResult = + new ContainerHealthResult.OverReplicatedHealthResult(request.getContainerInfo(), 1, false); + request.getReplicationQueue().enqueue(overReplicatedHealthResult); + } + return true; + } + return false; + } + + private static boolean hasEnoughOriginsWithOpen(ContainerInfo containerInfo, Set replicas) { + final long uniqueOpenReplicaCount = replicas.stream() + .filter(r -> r.getState() == State.QUASI_CLOSED || r.getState() == State.OPEN) + .map(ContainerReplica::getOriginDatanodeId) + .distinct() + .count(); + final int replicationFactor = containerInfo.getReplicationConfig().getRequiredNodes(); + return uniqueOpenReplicaCount >= replicationFactor; + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java index 3ed7348112a..4ddd99aeb3e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java @@ -79,6 +79,10 @@ public boolean handle(ContainerCheckRequest request) { // This handler is only for Ratis containers. return false; } + if (QuasiClosedStuckReplicationCheck + .shouldHandleAsQuasiClosedStuck(request.getContainerInfo(), request.getContainerReplicas())) { + return false; + } ReplicationManagerReport report = request.getReport(); ContainerInfo container = request.getContainerInfo(); ContainerHealthResult health = checkHealth(request); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerAttribute.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerAttribute.java index 4f4ad53ace5..8a7fdc472bd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerAttribute.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerAttribute.java @@ -20,16 +20,18 @@ import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.FAILED_TO_CHANGE_CONTAINER_STATE; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Maps; +import java.util.ArrayList; import java.util.EnumMap; -import java.util.NavigableSet; +import java.util.List; +import java.util.NavigableMap; import java.util.Objects; -import java.util.TreeSet; +import java.util.SortedMap; +import java.util.TreeMap; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.exceptions.SCMException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.ratis.util.Preconditions; /** * Each Attribute that we manage for a container is maintained as a map. @@ -59,11 +61,8 @@ * @param Attribute type */ public class ContainerAttribute> { - private static final Logger LOG = - LoggerFactory.getLogger(ContainerAttribute.class); - private final Class attributeClass; - private final ImmutableMap> attributeMap; + private final ImmutableMap> attributeMap; /** * Create an empty Container Attribute map. @@ -71,25 +70,21 @@ public class ContainerAttribute> { public ContainerAttribute(Class attributeClass) { this.attributeClass = attributeClass; - final EnumMap> map = new EnumMap<>(attributeClass); + final EnumMap> map = new EnumMap<>(attributeClass); for (T t : attributeClass.getEnumConstants()) { - map.put(t, new TreeSet<>()); + map.put(t, new TreeMap<>()); } this.attributeMap = Maps.immutableEnumMap(map); } /** - * Insert the value in the Attribute map, keep the original value if it exists - * already. - * - * @param key - The key to the set where the ContainerID should exist. - * @param value - Actual Container ID. - * @return true if the value is added; - * otherwise, the value already exists, return false. + * Add the given non-existing {@link ContainerInfo} to this attribute. + * @throws IllegalStateException if it already exists. */ - public boolean insert(T key, ContainerID value) { - Objects.requireNonNull(value, "value == null"); - return get(key).add(value); + public void addNonExisting(T key, ContainerInfo info) { + Objects.requireNonNull(info, "value == null"); + final ContainerInfo previous = get(key).put(info.containerID(), info); + Preconditions.assertNull(previous, "previous"); } /** @@ -102,30 +97,30 @@ public void clearSet(T key) { } /** - * Removes a container ID from the set pointed by the key. - * - * @param key - key to identify the set. - * @param value - Container ID + * Remove a container for the given id. + * @return the info if there was a mapping for the id; otherwise, return null */ - public boolean remove(T key, ContainerID value) { - Objects.requireNonNull(value, "value == null"); + public ContainerInfo remove(T key, ContainerID id) { + Objects.requireNonNull(id, "id == null"); + return get(key).remove(id); + } - if (!get(key).remove(value)) { - LOG.debug("Container {} not found in {} attribute", value, key); - return false; - } - return true; + /** Remove an existing {@link ContainerInfo}. */ + public void removeExisting(T key, ContainerInfo existing) { + Objects.requireNonNull(existing, "existing == null"); + final ContainerInfo removed = remove(key, existing.containerID()); + Preconditions.assertSame(existing, removed, "removed"); } - NavigableSet get(T attribute) { + NavigableMap get(T attribute) { Objects.requireNonNull(attribute, "attribute == null"); - final NavigableSet set = attributeMap.get(attribute); - if (set == null) { + final NavigableMap map = attributeMap.get(attribute); + if (map == null) { throw new IllegalStateException("Attribute not found: " + attribute + " (" + attributeClass.getSimpleName() + ")"); } - return set; + return map; } /** @@ -134,8 +129,17 @@ NavigableSet get(T attribute) { * @param key - Key to the bucket. * @return Underlying Set in immutable form. */ - public NavigableSet getCollection(T key) { - return ImmutableSortedSet.copyOf(get(key)); + public List getCollection(T key) { + return new ArrayList<>(get(key).values()); + } + + public SortedMap tailMap(T key, ContainerID start) { + Objects.requireNonNull(start, "start == null"); + return get(key).tailMap(start); + } + + public int count(T key) { + return get(key).size(); } /** @@ -153,17 +157,13 @@ public void update(T currentKey, T newKey, ContainerID value) } Objects.requireNonNull(newKey, "newKey == null"); - final boolean removed = remove(currentKey, value); - if (!removed) { + final ContainerInfo removed = remove(currentKey, value); + if (removed == null) { throw new SCMException("Failed to update Container " + value + " from " + currentKey + " to " + newKey + ": Container " + value + " not found in attribute " + currentKey, FAILED_TO_CHANGE_CONTAINER_STATE); } - final boolean inserted = insert(newKey, value); - if (!inserted) { - LOG.warn("Update Container {} from {} to {}: Container {} already exists in {}", - value, currentKey, newKey, value, newKey); - } + addNonExisting(newKey, removed); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java index 40b7d51c8c7..a01ce7c52f9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java @@ -18,20 +18,23 @@ package org.apache.hadoop.hdds.scm.container.states; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; -import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; -import java.util.NavigableSet; +import java.util.NavigableMap; +import java.util.Objects; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; +import java.util.TreeMap; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.ratis.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,24 +68,130 @@ * select a container that belongs to user1, with Ratis replication which can * make 3 copies of data. The fact that we will look for open containers by * default and if we cannot find them we will add new containers. - * + *

* All the calls are idempotent. + *

+ * This class is NOT thread-safe. */ public class ContainerStateMap { private static final Logger LOG = LoggerFactory.getLogger(ContainerStateMap.class); + /** + * Two levels map. + * Outer container map: {@link ContainerID} -> {@link Entry} (info and replicas) + * Inner replica map: {@link DatanodeID} -> {@link ContainerReplica} + */ + private static class ContainerMap { + private static class Entry { + private final ContainerInfo info; + private final Map replicas = new HashMap<>(); + + Entry(ContainerInfo info) { + this.info = info; + } + + ContainerInfo getInfo() { + return info; + } + + Set getReplicas() { + return new HashSet<>(replicas.values()); + } + + ContainerReplica put(ContainerReplica r) { + return replicas.put(r.getDatanodeDetails().getID(), r); + } + + ContainerReplica removeReplica(DatanodeID datanodeID) { + return replicas.remove(datanodeID); + } + } + + private final NavigableMap map = new TreeMap<>(); + + boolean contains(ContainerID id) { + return map.containsKey(id); + } + + ContainerInfo getInfo(ContainerID id) { + final Entry entry = map.get(id); + return entry == null ? null : entry.getInfo(); + } + + List getInfos(ContainerID start, int count) { + Objects.requireNonNull(start, "start == null"); + Preconditions.assertTrue(count >= 0, "count < 0"); + return map.tailMap(start).values().stream() + .map(Entry::getInfo) + .limit(count) + .collect(Collectors.toList()); + } + + Set getReplicas(ContainerID id) { + Objects.requireNonNull(id, "id == null"); + final Entry entry = map.get(id); + return entry == null ? null : entry.getReplicas(); + } + + /** + * Add if the given info not already in this map. + * + * @return true iff the given info is added. + */ + boolean addIfAbsent(ContainerInfo info) { + Objects.requireNonNull(info, "info == null"); + final ContainerID id = info.containerID(); + if (map.containsKey(id)) { + return false; // already exist + } + final Entry previous = map.put(id, new Entry(info)); + Preconditions.assertNull(previous, "previous"); + return true; + } + + ContainerReplica put(ContainerReplica replica) { + Objects.requireNonNull(replica, "replica == null"); + final Entry entry = map.get(replica.getContainerID()); + return entry == null ? null : entry.put(replica); + } + + ContainerInfo remove(ContainerID id) { + Objects.requireNonNull(id, "id == null"); + final Entry removed = map.remove(id); + return removed == null ? null : removed.getInfo(); + } + + ContainerReplica removeReplica(ContainerID containerID, DatanodeID datanodeID) { + Objects.requireNonNull(containerID, "containerID == null"); + Objects.requireNonNull(datanodeID, "datanodeID == null"); + final Entry entry = map.get(containerID); + return entry == null ? null : entry.removeReplica(datanodeID); + } + } + /** + * Map {@link LifeCycleState} to {@link ContainerInfo}. + * Note that a {@link ContainerInfo} can only exists in at most one of the {@link LifeCycleState}s. + */ private final ContainerAttribute lifeCycleStateMap = new ContainerAttribute<>(LifeCycleState.class); + /** + * Map {@link ReplicationType} to {@link ContainerInfo}. + * Note that a {@link ContainerInfo} can only exists in at most one of the {@link ReplicationType}s. + */ private final ContainerAttribute typeMap = new ContainerAttribute<>(ReplicationType.class); - private final Map containerMap; - private final Map> replicaMap; + /** + * Map {@link ContainerID} to ({@link ContainerInfo} and {@link ContainerReplica}). + * Note that the following sets are exactly the same + * 1. The {@link ContainerInfo} in this map. + * 2. The {@link ContainerInfo} in the union of all the states in {@link #lifeCycleStateMap}. + * 2. The {@link ContainerInfo} in the union of all the types in {@link #typeMap}. + */ + private final ContainerMap containerMap = new ContainerMap(); /** * Create a ContainerStateMap. */ public ContainerStateMap() { - this.containerMap = new ConcurrentHashMap<>(); - this.replicaMap = new ConcurrentHashMap<>(); } @VisibleForTesting @@ -94,24 +203,18 @@ public static Logger getLogger() { * Adds a ContainerInfo Entry in the ContainerStateMap. * * @param info - container info - * @throws SCMException - throws if create failed. */ - public void addContainer(final ContainerInfo info) - throws SCMException { - Preconditions.checkNotNull(info, "Container Info cannot be null"); - final ContainerID id = info.containerID(); - if (!contains(id)) { - containerMap.put(id, info); - lifeCycleStateMap.insert(info.getState(), id); - typeMap.insert(info.getReplicationType(), id); - replicaMap.put(id, Collections.emptySet()); - - LOG.trace("Container {} added to ContainerStateMap.", id); + public void addContainer(final ContainerInfo info) { + Objects.requireNonNull(info, "info == null"); + if (containerMap.addIfAbsent(info)) { + lifeCycleStateMap.addNonExisting(info.getState(), info); + typeMap.addNonExisting(info.getReplicationType(), info); + LOG.trace("Added {}", info); } } public boolean contains(final ContainerID id) { - return containerMap.containsKey(id); + return containerMap.contains(id); } /** @@ -120,15 +223,12 @@ public boolean contains(final ContainerID id) { * @param id - ContainerID */ public void removeContainer(final ContainerID id) { - Preconditions.checkNotNull(id, "ContainerID cannot be null"); - if (contains(id)) { - // Should we revert back to the original state if any of the below - // remove operation fails? - final ContainerInfo info = containerMap.remove(id); - lifeCycleStateMap.remove(info.getState(), id); - typeMap.remove(info.getReplicationType(), id); - replicaMap.remove(id); - LOG.trace("Container {} removed from ContainerStateMap.", id); + Objects.requireNonNull(id, "id == null"); + final ContainerInfo info = containerMap.remove(id); + if (info != null) { + lifeCycleStateMap.removeExisting(info.getState(), info); + typeMap.removeExisting(info.getReplicationType(), info); + LOG.trace("Removed {}", info); } } @@ -139,7 +239,7 @@ public void removeContainer(final ContainerID id) { * @return container info, if found else null. */ public ContainerInfo getContainerInfo(final ContainerID containerID) { - return containerMap.get(containerID); + return containerMap.getInfo(containerID); } /** @@ -148,8 +248,8 @@ public ContainerInfo getContainerInfo(final ContainerID containerID) { */ public Set getContainerReplicas( final ContainerID containerID) { - Preconditions.checkNotNull(containerID); - return replicaMap.get(containerID); + Objects.requireNonNull(containerID, "containerID == null"); + return containerMap.getReplicas(containerID); } /** @@ -157,39 +257,18 @@ public Set getContainerReplicas( * Logs a debug entry if a datanode is already added as replica for given * ContainerId. */ - public void updateContainerReplica(final ContainerID containerID, - final ContainerReplica replica) { - Preconditions.checkNotNull(containerID); - if (contains(containerID)) { - final Set newSet = createNewReplicaSet(containerID); - newSet.remove(replica); - newSet.add(replica); - replaceReplicaSet(containerID, newSet); - } + public void updateContainerReplica(ContainerReplica replica) { + Objects.requireNonNull(replica, "replica == null"); + containerMap.put(replica); } /** * Remove a container Replica for given DataNode. */ - public void removeContainerReplica(final ContainerID containerID, - final ContainerReplica replica) { - Preconditions.checkNotNull(containerID); - Preconditions.checkNotNull(replica); - if (contains(containerID)) { - final Set newSet = createNewReplicaSet(containerID); - newSet.remove(replica); - replaceReplicaSet(containerID, newSet); - } - } - - private Set createNewReplicaSet(ContainerID containerID) { - Set existingSet = replicaMap.get(containerID); - return existingSet == null ? new HashSet<>() : new HashSet<>(existingSet); - } - - private void replaceReplicaSet(ContainerID containerID, - Set newSet) { - replicaMap.put(containerID, Collections.unmodifiableSet(newSet)); + public void removeContainerReplica(final ContainerID containerID, DatanodeID datanodeID) { + Objects.requireNonNull(containerID, "containerID == null"); + Objects.requireNonNull(datanodeID, "datanodeID == null"); + containerMap.removeReplica(containerID, datanodeID); } /** @@ -205,7 +284,7 @@ public void updateState(ContainerID containerID, LifeCycleState currentState, if (currentState == newState) { // state not changed return; } - final ContainerInfo currentInfo = containerMap.get(containerID); + final ContainerInfo currentInfo = containerMap.getInfo(containerID); if (currentInfo == null) { // container not found return; } @@ -214,30 +293,34 @@ public void updateState(ContainerID containerID, LifeCycleState currentState, currentInfo.setState(newState); } - public Set getAllContainerIDs() { - return ImmutableSet.copyOf(containerMap.keySet()); + public List getContainerInfos(ContainerID start, int count) { + return containerMap.getInfos(start, count); } /** - * Returns Containers in the System by the Type. * - * @param type - Replication type -- StandAlone, Ratis etc. - * @return NavigableSet + * @param state the state of the {@link ContainerInfo}s + * @param start the start id + * @param count the maximum size of the returned list + * @return a list of {@link ContainerInfo}s sorted by {@link ContainerID} */ - public NavigableSet getContainerIDsByType(final ReplicationType type) { - Preconditions.checkNotNull(type); - return typeMap.getCollection(type); + public List getContainerInfos(LifeCycleState state, ContainerID start, int count) { + Preconditions.assertTrue(count >= 0, "count < 0"); + return lifeCycleStateMap.tailMap(state, start).values().stream() + .limit(count) + .collect(Collectors.toList()); } - /** - * Returns Containers by State. - * - * @param state - State - Open, Closed etc. - * @return List of containers by state. - */ - public NavigableSet getContainerIDsByState( - final LifeCycleState state) { - Preconditions.checkNotNull(state); + public List getContainerInfos(LifeCycleState state) { return lifeCycleStateMap.getCollection(state); } + + public List getContainerInfos(ReplicationType type) { + return typeMap.getCollection(type); + } + + /** @return the number of containers for the given state. */ + public int getContainerCount(LifeCycleState state) { + return lifeCycleStateMap.count(state); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/CommandQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/CommandQueue.java index 568328210c7..f122215105f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/CommandQueue.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/CommandQueue.java @@ -27,7 +27,6 @@ import java.util.UUID; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import org.apache.hadoop.util.Time; /** * Command Queue is queue of commands for the datanode. @@ -52,8 +51,6 @@ public long getCommandsInQueue() { /** * Constructs a Command Queue. - * TODO : Add a flusher thread that throws away commands older than a certain - * time period. */ public CommandQueue() { commandMap = new HashMap<>(); @@ -78,9 +75,9 @@ public void clear() { * @return List of SCM Commands. */ @SuppressWarnings("unchecked") - List getCommand(final UUID datanodeUuid) { + List> getCommand(final UUID datanodeUuid) { Commands cmds = commandMap.remove(datanodeUuid); - List cmdList = null; + List> cmdList = null; if (cmds != null) { cmdList = cmds.getCommands(); commandsInQueue -= !cmdList.isEmpty() ? cmdList.size() : 0; @@ -134,8 +131,7 @@ public Map getDatanodeCommandSummary( * @param datanodeUuid DatanodeDetails.Uuid * @param command - Command */ - public void addCommand(final UUID datanodeUuid, final SCMCommand - command) { + public void addCommand(final UUID datanodeUuid, final SCMCommand command) { commandMap.computeIfAbsent(datanodeUuid, s -> new Commands()).add(command); commandsInQueue++; } @@ -144,39 +140,20 @@ public void addCommand(final UUID datanodeUuid, final SCMCommand * Class that stores commands for a datanode. */ private static class Commands { - private long updateTime = 0; - private long readTime = 0; - private List commands = new ArrayList<>(); + private List> commands = new ArrayList<>(); private final Map summary = new HashMap<>(); - /** - * Gets the last time the commands for this node was updated. - * @return Time stamp - */ - public long getUpdateTime() { - return updateTime; - } - - /** - * Gets the last read time. - * @return last time when these commands were read from this queue. - */ - public long getReadTime() { - return readTime; - } - /** * Adds a command to the list. * * @param command SCMCommand */ - public void add(SCMCommand command) { + public void add(SCMCommand command) { this.commands.add(command); if (command.contributesToQueueSize()) { summary.put(command.getType(), summary.getOrDefault(command.getType(), 0) + 1); } - updateTime = Time.monotonicNow(); } public int getCommandSummary(SCMCommandProto.Type commandType) { @@ -191,11 +168,10 @@ public Map getAllCommandsSummary() { * Returns the commands for this datanode. * @return command list. */ - public List getCommands() { - List temp = this.commands; + public List> getCommands() { + List> temp = this.commands; this.commands = new ArrayList<>(); summary.clear(); - readTime = Time.monotonicNow(); return temp; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java index 20dc5aea786..f582623b8c1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java @@ -98,7 +98,7 @@ public void onMessage(final DatanodeDetails datanodeDetails, } // remove commands in command queue for the DN - final List cmdList = nodeManager.getCommandQueue( + final List> cmdList = nodeManager.getCommandQueue( datanodeDetails.getUuid()); LOG.info("Clearing command queue of size {} for DN {}", cmdList.size(), datanodeDetails); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index e6a74b395f7..275665ec38c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -282,7 +282,7 @@ Set getContainers(DatanodeDetails datanodeDetails) * @param dnId datanode uuid * @param command */ - void addDatanodeCommand(UUID dnId, SCMCommand command); + void addDatanodeCommand(UUID dnId, SCMCommand command); /** @@ -368,7 +368,7 @@ Map getTotalDatanodeCommandCounts( * @return list of commands */ // TODO: We can give better name to this method! - List getCommandQueue(UUID dnID); + List> getCommandQueue(UUID dnID); /** * Given datanode uuid, returns the DatanodeDetails for the node. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 43d13e4ae6d..ee6ad2b3380 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -533,7 +533,7 @@ private boolean isVersionChange(String oldVersion, String newVersion) { * @return SCMheartbeat response. */ @Override - public List processHeartbeat(DatanodeDetails datanodeDetails, + public List> processHeartbeat(DatanodeDetails datanodeDetails, CommandQueueReportProto queueReport) { Preconditions.checkNotNull(datanodeDetails, "Heartbeat is missing " + "DatanodeDetails."); @@ -550,7 +550,7 @@ public List processHeartbeat(DatanodeDetails datanodeDetails, try { Map summary = commandQueue.getDatanodeCommandSummary(datanodeDetails.getUuid()); - List commands = + List> commands = commandQueue.getCommand(datanodeDetails.getUuid()); // Update the SCMCommand of deleteBlocksCommand Status @@ -1635,7 +1635,7 @@ public int getPipeLineCount(DatanodeDetails datanodeDetails) } @Override - public void addDatanodeCommand(UUID dnId, SCMCommand command) { + public void addDatanodeCommand(UUID dnId, SCMCommand command) { writeLock().lock(); try { this.commandQueue.addCommand(dnId, command); @@ -1678,7 +1678,7 @@ public void onMessage(CommandForDatanode commandForDatanode, } @Override - public List getCommandQueue(UUID dnID) { + public List> getCommandQueue(UUID dnID) { // Getting the queue actually clears it and returns the commands, so this // is a write operation and not a read as the method name suggests. writeLock().lock(); @@ -1846,7 +1846,7 @@ public void removeNode(DatanodeDetails datanodeDetails) throws NodeNotFoundExcep } nodeStateManager.removeNode(datanodeDetails); removeFromDnsToUuidMap(datanodeDetails.getUuid(), datanodeDetails.getIpAddress()); - final List cmdList = getCommandQueue(datanodeDetails.getUuid()); + final List> cmdList = getCommandQueue(datanodeDetails.getUuid()); LOG.info("Clearing command queue of size {} for DN {}", cmdList.size(), datanodeDetails); } else { LOG.warn("Node not decommissioned or dead, cannot remove: {}", datanodeDetails); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java index ddc87da038e..58d2a8164ea 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java @@ -82,10 +82,10 @@ public SCMDatanodeHeartbeatDispatcher(NodeManager nodeManager, * * @return list of SCMCommand */ - public List dispatch(SCMHeartbeatRequestProto heartbeat) { + public List> dispatch(SCMHeartbeatRequestProto heartbeat) { DatanodeDetails datanodeDetails = DatanodeDetails.getFromProtoBuf(heartbeat.getDatanodeDetails()); - List commands; + List> commands; // If node is not registered, ask the node to re-register. Do not process // Heartbeat for unregistered nodes. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java index 4a549afa773..50b6d25f079 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java @@ -302,7 +302,7 @@ private String constructCommandAuditMap(List cmds) { public SCMHeartbeatResponseProto sendHeartbeat( SCMHeartbeatRequestProto heartbeat) throws IOException, TimeoutException { List cmdResponses = new ArrayList<>(); - for (SCMCommand cmd : heartbeatDispatcher.dispatch(heartbeat)) { + for (SCMCommand cmd : heartbeatDispatcher.dispatch(heartbeat)) { cmdResponses.add(getCommandResponse(cmd, scm)); } final OptionalLong term = getTermIfLeader(); @@ -352,7 +352,7 @@ private OptionalLong getTermIfLeader() { * @throws IOException */ @VisibleForTesting - public static SCMCommandProto getCommandResponse(SCMCommand cmd, + public static SCMCommandProto getCommandResponse(SCMCommand cmd, OzoneStorageContainerManager scm) throws IOException, TimeoutException { SCMCommandProto.Builder builder = SCMCommandProto.newBuilder() .setEncodedToken(cmd.getEncodedToken()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java index dd297cb35ec..8b9dfe873e8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java @@ -132,7 +132,7 @@ public void testReplicasToFixMisreplicationWithOneMisreplication() { List replicaDns = Stream.of(0, 1, 2, 3, 5) .map(list::get).collect(Collectors.toList()); List replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 1, ImmutableMap.of(racks.get(0), 1)); @@ -153,7 +153,7 @@ public void testReplicasToFixMisreplicationWithTwoMisreplication() { List replicaDns = Stream.of(0, 1, 2, 3, 5) .map(list::get).collect(Collectors.toList()); List replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, ImmutableMap.of(racks.get(0), 2)); @@ -174,7 +174,7 @@ public void testReplicasToFixMisreplicationWithThreeMisreplication() { List replicaDns = Stream.of(0, 1, 2, 3, 5) .map(list::get).collect(Collectors.toList()); List replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 3, ImmutableMap.of(racks.get(0), 3)); @@ -197,7 +197,7 @@ public void testReplicasToFixMisreplicationWithThreeMisreplication() { .map(list::get).collect(Collectors.toList()); //Creating Replicas without replica Index List replicas = HddsTestUtils - .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + .getReplicas(ContainerID.valueOf(1), CLOSED, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 3, ImmutableMap.of(racks.get(0), 2, racks.get(3), 1)); } @@ -220,7 +220,7 @@ public void testReplicasToFixMisreplicationWithThreeMisreplication() { .map(list::get).collect(Collectors.toList()); //Creating Replicas without replica Index for replicas < number of racks List replicas = HddsTestUtils - .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + .getReplicas(ContainerID.valueOf(1), CLOSED, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, ImmutableMap.of(racks.get(0), 1, racks.get(3), 1)); } @@ -243,7 +243,7 @@ public void testReplicasToFixMisreplicationWithThreeMisreplication() { .map(list::get).collect(Collectors.toList()); //Creating Replicas without replica Index for replicas >number of racks List replicas = HddsTestUtils - .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + .getReplicas(ContainerID.valueOf(1), CLOSED, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, ImmutableMap.of(racks.get(0), 1, racks.get(3), 1)); } @@ -257,7 +257,7 @@ public void testReplicasToFixMisreplicationMaxReplicaPerRack() { List replicaDns = Stream.of(0, 2, 4, 6, 8) .map(list::get).collect(Collectors.toList()); List replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns); testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, ImmutableMap.of(racks.get(0), 2)); @@ -273,7 +273,7 @@ public void testReplicasToFixMisreplicationMaxReplicaPerRack() { List replicaDns = Stream.of(0, 2, 4, 6, 8) .map(list::get).collect(Collectors.toList()); List replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns); Map replicaMap = replicas.stream().distinct() .collect(Collectors.toMap(Function.identity(), r -> false)); @@ -292,7 +292,7 @@ public void testReplicasWithoutMisreplication() { List replicaDns = Stream.of(0, 1, 2, 3, 4) .map(list::get).collect(Collectors.toList()); Map replicas = - HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + HddsTestUtils.getReplicasWithReplicaIndex(ContainerID.valueOf(1), CLOSED, 0, 0, 0, replicaDns) .stream() .collect(Collectors.toMap(Function.identity(), r -> true)); @@ -308,9 +308,9 @@ public void testReplicasToRemoveWithOneOverreplication() { List list = nodeManager.getAllNodes(); Set replicas = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(1, 6))); ContainerReplica replica = ContainerReplica.newBuilder() - .setContainerID(new ContainerID(1)) + .setContainerID(ContainerID.valueOf(1)) .setContainerState(CLOSED) .setReplicaIndex(1) .setDatanodeDetails(list.get(7)).build(); @@ -330,11 +330,11 @@ public void testReplicasToRemoveWithTwoOverreplication() { Set replicas = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(1, 6))); Set replicasToBeRemoved = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(7, 9))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(7, 9))); replicas.addAll(replicasToBeRemoved); Set replicasToRemove = dummyPlacementPolicy @@ -351,14 +351,14 @@ public void testReplicasToRemoveWith2CountPerUniqueReplica() { Set replicas = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(0, 3))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(0, 3))); replicas.addAll(HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(3, 6))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(3, 6))); Set replicasToBeRemoved = Sets.newHashSet( - HddsTestUtils.getReplicaBuilder(new ContainerID(1), CLOSED, 0, 0, 0, + HddsTestUtils.getReplicaBuilder(ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.get(7).getUuid(), list.get(7)) .setReplicaIndex(1).build(), - HddsTestUtils.getReplicaBuilder(new ContainerID(1), CLOSED, 0, 0, 0, + HddsTestUtils.getReplicaBuilder(ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.get(8).getUuid(), list.get(8)).setReplicaIndex(1) .build()); replicas.addAll(replicasToBeRemoved); @@ -376,7 +376,7 @@ public void testReplicasToRemoveWithoutReplicaIndex() { List list = nodeManager.getAllNodes(); Set replicas = Sets.newHashSet(HddsTestUtils.getReplicas( - new ContainerID(1), CLOSED, 0, list.subList(0, 5))); + ContainerID.valueOf(1), CLOSED, 0, list.subList(0, 5))); Set replicasToRemove = dummyPlacementPolicy .replicasToRemoveToFixOverreplication(replicas, 3); @@ -397,16 +397,16 @@ public void testReplicasToRemoveWithOverreplicationWithinSameRack() { Set replicas = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(1, 6))); ContainerReplica replica1 = ContainerReplica.newBuilder() - .setContainerID(new ContainerID(1)) + .setContainerID(ContainerID.valueOf(1)) .setContainerState(CLOSED) .setReplicaIndex(1) .setDatanodeDetails(list.get(6)).build(); replicas.add(replica1); ContainerReplica replica2 = ContainerReplica.newBuilder() - .setContainerID(new ContainerID(1)) + .setContainerID(ContainerID.valueOf(1)) .setContainerState(CLOSED) .setReplicaIndex(1) .setDatanodeDetails(list.get(0)).build(); @@ -435,7 +435,7 @@ public void testReplicasToRemoveWithNoOverreplication() { List list = nodeManager.getAllNodes(); Set replicas = Sets.newHashSet( HddsTestUtils.getReplicasWithReplicaIndex( - new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + ContainerID.valueOf(1), CLOSED, 0, 0, 0, list.subList(1, 6))); Set replicasToRemove = dummyPlacementPolicy .replicasToRemoveToFixOverreplication(replicas, 1); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index 244f86e7954..a0d39b0f01c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -102,7 +102,7 @@ public class MockNodeManager implements NodeManager { private final List deadNodes; private final Map nodeMetricMap; private final SCMNodeStat aggregateStat; - private final Map> commandMap; + private final Map>> commandMap; private Node2PipelineMap node2PipelineMap; private final Node2ContainerMap node2ContainerMap; private NetworkTopology clusterMap; @@ -533,13 +533,13 @@ public void removeContainer(DatanodeDetails dd, } @Override - public void addDatanodeCommand(UUID dnId, SCMCommand command) { + public void addDatanodeCommand(UUID dnId, SCMCommand command) { if (commandMap.containsKey(dnId)) { - List commandList = commandMap.get(dnId); + List> commandList = commandMap.get(dnId); Preconditions.checkNotNull(commandList); commandList.add(command); } else { - List commandList = new LinkedList<>(); + List> commandList = new LinkedList<>(); commandList.add(command); commandMap.put(dnId, commandList); } @@ -656,7 +656,7 @@ public Set getContainers(DatanodeDetails uuid) { // Returns the number of commands that is queued to this node manager. public int getCommandCount(DatanodeDetails dd) { - List list = commandMap.get(dd.getUuid()); + List> list = commandMap.get(dd.getUuid()); return (list == null) ? 0 : list.size(); } @@ -760,7 +760,7 @@ private synchronized void addEntryTodnsToUuidMap( * @return SCMheartbeat response list */ @Override - public List processHeartbeat(DatanodeDetails datanodeDetails, + public List> processHeartbeat(DatanodeDetails datanodeDetails, CommandQueueReportProto commandQueueReportProto) { return null; } @@ -847,7 +847,7 @@ public void onMessage(CommandForDatanode commandForDatanode, } @Override - public List getCommandQueue(UUID dnID) { + public List> getCommandQueue(UUID dnID) { return null; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java index ea1054784d0..085a2824440 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java @@ -269,7 +269,7 @@ public void removeContainer(DatanodeDetails datanodeDetails, } @Override - public void addDatanodeCommand(UUID dnId, SCMCommand command) { + public void addDatanodeCommand(UUID dnId, SCMCommand command) { } /** @@ -341,7 +341,7 @@ public Map getTotalDatanodeCommandCounts( } @Override - public List getCommandQueue(UUID dnID) { + public List> getCommandQueue(UUID dnID) { return null; } @@ -426,7 +426,7 @@ public RegisteredCommand register(DatanodeDetails datanodeDetails, } @Override - public List processHeartbeat(DatanodeDetails datanodeDetails, + public List> processHeartbeat(DatanodeDetails datanodeDetails, CommandQueueReportProto commandQueueReportProto) { return null; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java index 9f382dd70ec..5baa26ba26a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java @@ -142,7 +142,6 @@ void setup() throws IOException, InvalidStateTransitionException { doAnswer(invocation -> { containerStateManager.updateContainerReplica( - ((ContainerID)invocation.getArguments()[0]), (ContainerReplica) invocation.getArguments()[1]); return null; }).when(containerManager).updateContainerReplica( @@ -150,7 +149,6 @@ void setup() throws IOException, InvalidStateTransitionException { doAnswer(invocation -> { containerStateManager.removeContainerReplica( - ((ContainerID)invocation.getArguments()[0]), (ContainerReplica) invocation.getArguments()[1]); return null; }).when(containerManager).removeContainerReplica( @@ -265,8 +263,7 @@ public void testECReplicaIndexValidation() throws NodeNotFoundException, Map replicaMap = replicas.stream() .collect(Collectors.toMap(ContainerReplica::getDatanodeDetails, ContainerReplica::getReplicaIndex)); - replicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + replicas.forEach(containerStateManager::updateContainerReplica); testReplicaIndexUpdate(container, datanodeOne, 0, replicaMap); testReplicaIndexUpdate(container, datanodeOne, 6, replicaMap); replicaMap.put(datanodeOne, 2); @@ -300,14 +297,12 @@ public void testUnderReplicatedContainer() getReplicas(containerOne.containerID(), ContainerReplicaProto.State.CLOSED, datanodeOne, datanodeTwo, datanodeThree) - .forEach(r -> containerStateManager.updateContainerReplica( - containerOne.containerID(), r)); + .forEach(containerStateManager::updateContainerReplica); getReplicas(containerTwo.containerID(), ContainerReplicaProto.State.CLOSED, datanodeOne, datanodeTwo, datanodeThree) - .forEach(r -> containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); + .forEach(containerStateManager::updateContainerReplica); // SCM expects both containerOne and containerTwo to be in all the three @@ -359,14 +354,12 @@ public void testOverReplicatedContainer() throws NodeNotFoundException, getReplicas(containerOne.containerID(), ContainerReplicaProto.State.CLOSED, datanodeOne, datanodeTwo, datanodeThree) - .forEach(r -> containerStateManager.updateContainerReplica( - containerOne.containerID(), r)); + .forEach(containerStateManager::updateContainerReplica); getReplicas(containerTwo.containerID(), ContainerReplicaProto.State.CLOSED, datanodeOne, datanodeTwo, datanodeThree) - .forEach(r -> containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); + .forEach(containerStateManager::updateContainerReplica); // SCM expects both containerOne and containerTwo to be in all the three @@ -438,14 +431,8 @@ public void testClosingToClosed() throws NodeNotFoundException, IOException, containerStateManager.addContainer(containerOne.getProtobuf()); containerStateManager.addContainer(containerTwo.getProtobuf()); - containerOneReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - - containerTwoReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - + containerOneReplicas.forEach(containerStateManager::updateContainerReplica); + containerTwoReplicas.forEach(containerStateManager::updateContainerReplica); final ContainerReportsProto containerReport = getContainerReportsProto( containerOne.containerID(), ContainerReplicaProto.State.CLOSED, @@ -655,7 +642,7 @@ private List setupECContainerForTesting( container.getSequenceId(), dns.toArray(new DatanodeDetails[0])); for (ContainerReplica r : replicas) { - containerStateManager.updateContainerReplica(container.containerID(), r); + containerStateManager.updateContainerReplica(r); } // Tell NodeManager that each DN hosts a replica of this container @@ -725,14 +712,8 @@ public void testClosingToQuasiClosed() containerStateManager.addContainer(containerOne.getProtobuf()); containerStateManager.addContainer(containerTwo.getProtobuf()); - containerOneReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - - containerTwoReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - + containerOneReplicas.forEach(containerStateManager::updateContainerReplica); + containerTwoReplicas.forEach(containerStateManager::updateContainerReplica); final ContainerReportsProto containerReport = getContainerReportsProto( containerOne.containerID(), ContainerReplicaProto.State.QUASI_CLOSED, @@ -795,14 +776,8 @@ public void testQuasiClosedToClosed() containerStateManager.addContainer(containerOne.getProtobuf()); containerStateManager.addContainer(containerTwo.getProtobuf()); - containerOneReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - - containerTwoReplicas.forEach(r -> - containerStateManager.updateContainerReplica( - containerTwo.containerID(), r)); - + containerOneReplicas.forEach(containerStateManager::updateContainerReplica); + containerTwoReplicas.forEach(containerStateManager::updateContainerReplica); final ContainerReportsProto containerReport = getContainerReportsProto( containerOne.containerID(), ContainerReplicaProto.State.CLOSED, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java index e48c8f1316d..00f8ff83586 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java @@ -205,8 +205,7 @@ private void addReplica(ContainerInfo cont, DatanodeDetails node) { .setContainerState(ContainerReplicaProto.State.CLOSED) .setDatanodeDetails(node) .build(); - containerStateManager - .updateContainerReplica(cont.containerID(), replica); + containerStateManager.updateContainerReplica(replica); } private ContainerInfo allocateContainer() diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java index fa92fa1f774..502c668cb15 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java @@ -155,8 +155,7 @@ public void setup() throws IOException, InvalidStateTransitionException, doAnswer(invocation -> { containerStateManager - .removeContainerReplica(((ContainerID)invocation - .getArguments()[0]), + .removeContainerReplica( (ContainerReplica)invocation.getArguments()[1]); return null; }).when(containerManager).removeContainerReplica( @@ -175,8 +174,7 @@ public void setup() throws IOException, InvalidStateTransitionException, doAnswer(invocation -> { containerStateManager - .updateContainerReplica(((ContainerID)invocation - .getArguments()[0]), + .updateContainerReplica( (ContainerReplica) invocation.getArguments()[1]); return null; }).when(containerManager).updateContainerReplica( @@ -213,8 +211,7 @@ public void testClosingToClosed() throws IOException, TimeoutException { datanodeOne, datanodeTwo, datanodeThree); containerStateManager.addContainer(container.getProtobuf()); - containerReplicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + containerReplicas.forEach(containerStateManager::updateContainerReplica); final IncrementalContainerReportProto containerReport = getIncrementalContainerReportProto(container.containerID(), @@ -331,7 +328,7 @@ private List setupECContainerForTesting( container.getSequenceId(), dns.toArray(new DatanodeDetails[0])); for (ContainerReplica r : replicas) { - containerStateManager.updateContainerReplica(container.containerID(), r); + containerStateManager.updateContainerReplica(r); } // Tell NodeManager that each DN hosts a replica of this container @@ -359,9 +356,7 @@ public void testClosingToQuasiClosed() throws IOException { datanodeOne, datanodeTwo, datanodeThree); containerStateManager.addContainer(container.getProtobuf()); - containerReplicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); - + containerReplicas.forEach(containerStateManager::updateContainerReplica); final IncrementalContainerReportProto containerReport = getIncrementalContainerReportProto(container.containerID(), @@ -396,8 +391,7 @@ public void testQuasiClosedToClosed() throws IOException { datanodeThree)); containerStateManager.addContainer(container.getProtobuf()); - containerReplicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + containerReplicas.forEach(containerStateManager::updateContainerReplica); final IncrementalContainerReportProto containerReport = getIncrementalContainerReportProto(container.containerID(), @@ -440,8 +434,7 @@ public void testContainerStateTransitionToClosedWithMismatchingBCSID(LifeCycleSt datanodeThree)); containerStateManager.addContainer(container.getProtobuf()); - containerReplicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + containerReplicas.forEach(containerStateManager::updateContainerReplica); // Generate incremental container report with replica in CLOSED state with intentional lower bcsId final IncrementalContainerReportProto containerReport = @@ -489,8 +482,7 @@ public void testOpenWithUnhealthyReplica() throws IOException { datanodeOne, datanodeTwo, datanodeThree); containerStateManager.addContainer(container.getProtobuf()); - containerReplicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + containerReplicas.forEach(containerStateManager::updateContainerReplica); final IncrementalContainerReportProto containerReport = getIncrementalContainerReportProto(container.containerID(), @@ -523,7 +515,7 @@ public void testDeleteContainer() throws IOException, TimeoutException, containerStateManager.addContainer(container.getProtobuf()); containerReplicas.forEach(r -> { - containerStateManager.updateContainerReplica(container.containerID(), r); + containerStateManager.updateContainerReplica(r); assertDoesNotThrow(() -> nodeManager.addContainer(r.getDatanodeDetails(), container.containerID()), "Node should be found"); @@ -727,8 +719,7 @@ public void testECReplicaIndexValidation() throws NodeNotFoundException, Map replicaMap = replicas.stream() .collect(Collectors.toMap(ContainerReplica::getDatanodeDetails, ContainerReplica::getReplicaIndex)); - replicas.forEach(r -> containerStateManager.updateContainerReplica( - container.containerID(), r)); + replicas.forEach(containerStateManager::updateContainerReplica); testReplicaIndexUpdate(container, dns.get(0), 0, replicaMap); testReplicaIndexUpdate(container, dns.get(0), 6, replicaMap); replicaMap.put(dns.get(0), 2); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index d3ee327bc43..444affca7db 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -144,6 +144,18 @@ public static ContainerReplica createEmptyContainerReplica(ContainerID container datanodeDetails, datanodeDetails.getUuid()); } + public static Set createReplicasWithOriginAndOpState( + ContainerID containerID, ContainerReplicaProto.State replicaState, + Pair... nodes) { + Set replicas = new HashSet<>(); + for (Pair i : nodes) { + replicas.add(createContainerReplica( + containerID, 0, i.getRight(), replicaState, 123L, 1234L, + MockDatanodeDetails.randomDatanodeDetails(), i.getLeft())); + } + return replicas; + } + public static ContainerReplica createContainerReplica(ContainerID containerID, int replicaIndex, HddsProtos.NodeOperationalState opState, ContainerReplicaProto.State replicaState) { @@ -492,9 +504,29 @@ public static void mockRMSendDeleteCommand(ReplicationManager mock, * @param commandsSent Set to add the command to rather than sending it. */ public static void mockRMSendThrottledDeleteCommand(ReplicationManager mock, - Set>> commandsSent) + Set>> commandsSent) + throws NotLeaderException, CommandTargetOverloadedException { + mockRMSendThrottledDeleteCommand(mock, commandsSent, new AtomicBoolean(false)); + } + + /** + * Given a Mockito mock of ReplicationManager, this method will mock the + * sendThrottledDeleteCommand method so that it adds the command created to + * the commandsSent set. + * @param mock Mock of ReplicationManager + * @param commandsSent Set to add the command to rather than sending it. + * @param throwOverloaded If the atomic boolean is true, throw a + * CommandTargetOverloadedException and set the boolean + * to false, instead of creating the replicate command. + */ + public static void mockRMSendThrottledDeleteCommand(ReplicationManager mock, + Set>> commandsSent, AtomicBoolean throwOverloaded) throws NotLeaderException, CommandTargetOverloadedException { doAnswer((Answer) invocationOnMock -> { + if (throwOverloaded.get()) { + throwOverloaded.set(false); + throw new CommandTargetOverloadedException("Overloaded"); + } ContainerInfo containerInfo = invocationOnMock.getArgument(0); int replicaIndex = invocationOnMock.getArgument(1); DatanodeDetails target = invocationOnMock.getArgument(2); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java index 95209b68d81..0f546a4e3ec 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java @@ -93,14 +93,14 @@ void cleanup() { @Test public void testGetPendingOpsReturnsEmptyList() { List ops = - pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(0, ops.size()); } @Test public void testClear() { - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 0, addCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn1, 0, deleteCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 0, addCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn1, 0, deleteCmd, deadline); assertEquals(1, pendingOps.getPendingOpCount(ContainerReplicaOp.PendingOpType.ADD)); assertEquals(1, pendingOps.getPendingOpCount(ContainerReplicaOp.PendingOpType.DELETE)); @@ -109,26 +109,26 @@ public void testClear() { assertEquals(0, pendingOps.getPendingOpCount(ContainerReplicaOp.PendingOpType.ADD)); assertEquals(0, pendingOps.getPendingOpCount(ContainerReplicaOp.PendingOpType.DELETE)); - assertEquals(0, pendingOps.getPendingOps(new ContainerID(1)).size()); - assertEquals(0, pendingOps.getPendingOps(new ContainerID(2)).size()); + assertEquals(0, pendingOps.getPendingOps(ContainerID.valueOf(1)).size()); + assertEquals(0, pendingOps.getPendingOps(ContainerID.valueOf(2)).size()); } @Test public void testCanAddReplicasForAdd() { - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 0, addCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn2, 0, addCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn3, 0, addCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 0, addCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn2, 0, addCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn3, 0, addCmd, deadline); // Duplicate for DN2 - pendingOps.scheduleAddReplica(new ContainerID(1), dn2, 0, addCmd, deadline + 1); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn2, 0, addCmd, deadline + 1); // Not a duplicate for DN2 as different index. Should not happen in practice as it is not valid to have 2 indexes // on the same node. - pendingOps.scheduleAddReplica(new ContainerID(1), dn2, 1, addCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(2), dn1, 1, addCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(2), dn1, 1, addCmd, deadline + 1); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn2, 1, addCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(2), dn1, 1, addCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(2), dn1, 1, addCmd, deadline + 1); List ops = - pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(4, ops.size()); for (ContainerReplicaOp op : ops) { if (!op.getTarget().equals(dn2)) { @@ -147,7 +147,7 @@ public void testCanAddReplicasForAdd() { assertThat(allDns).contains(dn2); assertThat(allDns).contains(dn3); - ops = pendingOps.getPendingOps(new ContainerID(2)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(2)); assertEquals(1, ops.size()); assertEquals(1, ops.get(0).getReplicaIndex()); assertEquals(ADD, ops.get(0).getOpType()); @@ -157,13 +157,13 @@ public void testCanAddReplicasForAdd() { @Test public void testCanAddReplicasForDelete() { - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn1, 0, deleteCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn2, 0, deleteCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn3, 0, deleteCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn1, 1, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn1, 0, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn2, 0, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn3, 0, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn1, 1, deleteCmd, deadline); List ops = - pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(3, ops.size()); for (ContainerReplicaOp op : ops) { assertEquals(0, op.getReplicaIndex()); @@ -175,7 +175,7 @@ public void testCanAddReplicasForDelete() { assertThat(allDns).contains(dn2); assertThat(allDns).contains(dn3); - ops = pendingOps.getPendingOps(new ContainerID(2)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(2)); assertEquals(1, ops.size()); assertEquals(1, ops.get(0).getReplicaIndex()); assertEquals(DELETE, ops.get(0).getOpType()); @@ -184,46 +184,46 @@ public void testCanAddReplicasForDelete() { @Test public void testCompletingOps() { - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn1, 0, deleteCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 0, addCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn2, 0, deleteCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn3, 0, addCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn1, 1, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn1, 0, deleteCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 0, addCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn2, 0, deleteCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn3, 0, addCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn1, 1, deleteCmd, deadline); List ops = - pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.getPendingOps(ContainerID.valueOf(1)); // We expect 4 entries - 2 add and 2 delete. assertEquals(4, ops.size()); assertTrue(pendingOps - .completeAddReplica(new ContainerID(1), dn1, 0)); - ops = pendingOps.getPendingOps(new ContainerID(1)); + .completeAddReplica(ContainerID.valueOf(1), dn1, 0)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(3, ops.size()); // Complete one that does not exist: assertFalse(pendingOps - .completeAddReplica(new ContainerID(1), dn1, 0)); - ops = pendingOps.getPendingOps(new ContainerID(1)); + .completeAddReplica(ContainerID.valueOf(1), dn1, 0)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(3, ops.size()); // Complete the remaining ones - pendingOps.completeDeleteReplica(new ContainerID(1), dn1, 0); - pendingOps.completeDeleteReplica(new ContainerID(1), dn2, 0); - pendingOps.completeAddReplica(new ContainerID(1), dn3, 0); - ops = pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.completeDeleteReplica(ContainerID.valueOf(1), dn1, 0); + pendingOps.completeDeleteReplica(ContainerID.valueOf(1), dn2, 0); + pendingOps.completeAddReplica(ContainerID.valueOf(1), dn3, 0); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(0, ops.size()); } @Test public void testRemoveSpecificOp() { - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn1, 0, deleteCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 0, addCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn2, 0, deleteCmd, deadline); - pendingOps.scheduleAddReplica(new ContainerID(1), dn3, 0, addCmd, deadline); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn1, 1, deleteCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn1, 0, deleteCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 0, addCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn2, 0, deleteCmd, deadline); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn3, 0, addCmd, deadline); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn1, 1, deleteCmd, deadline); - ContainerID cid = new ContainerID(1); + ContainerID cid = ContainerID.valueOf(1); List ops = pendingOps.getPendingOps(cid); assertEquals(4, ops.size()); for (ContainerReplicaOp op : ops) { @@ -240,17 +240,17 @@ public void testRemoveExpiredEntries() { long expiry = clock.millis() + 1000; long laterExpiry = clock.millis() + 2000; long latestExpiry = clock.millis() + 3000; - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn1, 0, deleteCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 0, addCmd, expiry); - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn2, 0, deleteCmd, laterExpiry); - pendingOps.scheduleAddReplica(new ContainerID(1), dn3, 0, addCmd, laterExpiry); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn1, 1, deleteCmd, latestExpiry); - pendingOps.scheduleAddReplica(new ContainerID(2), dn1, 1, addCmd, latestExpiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn1, 0, deleteCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 0, addCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn2, 0, deleteCmd, laterExpiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn3, 0, addCmd, laterExpiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn1, 1, deleteCmd, latestExpiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(2), dn1, 1, addCmd, latestExpiry); List ops = - pendingOps.getPendingOps(new ContainerID(1)); + pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(4, ops.size()); - ops = pendingOps.getPendingOps(new ContainerID(2)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(2)); assertEquals(2, ops.size()); // Some entries expire at "start + 1000" some at start + 2000 and @@ -258,13 +258,13 @@ public void testRemoveExpiredEntries() { clock.fastForward(1000); pendingOps.removeExpiredEntries(); // Nothing is remove as no deadline is older than the current clock time. - ops = pendingOps.getPendingOps(new ContainerID(1)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(4, ops.size()); clock.fastForward(1000); pendingOps.removeExpiredEntries(); // Those ADD with deadline + 1000 should be removed, but deletes are retained - ops = pendingOps.getPendingOps(new ContainerID(1)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(3, ops.size()); // We should lose the entries for DN1 assertFalse(isOpPresent(ops, dn1, 0, ADD)); @@ -275,19 +275,19 @@ public void testRemoveExpiredEntries() { pendingOps.removeExpiredEntries(); // Now should only have entries for container 2 and the deletes for container 1 - ops = pendingOps.getPendingOps(new ContainerID(1)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(1)); assertEquals(2, ops.size()); assertTrue(isOpPresent(ops, dn1, 0, DELETE)); assertTrue(isOpPresent(ops, dn2, 0, DELETE)); - ops = pendingOps.getPendingOps(new ContainerID(2)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(2)); assertEquals(2, ops.size()); // Advance the clock again and all should be removed except deletes clock.fastForward(1000); pendingOps.removeExpiredEntries(); - ops = pendingOps.getPendingOps(new ContainerID(2)); + ops = pendingOps.getPendingOps(ContainerID.valueOf(2)); assertTrue(isOpPresent(ops, dn1, 1, DELETE)); assertEquals(1, ops.size()); } @@ -301,12 +301,12 @@ private boolean isOpPresent(List ops, DatanodeDetails dn, @Test public void testReplicationMetrics() { long expiry = clock.millis() + 1000; - pendingOps.scheduleDeleteReplica(new ContainerID(1), dn1, 1, deleteCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(1), dn1, 2, addCmd, expiry); - pendingOps.scheduleDeleteReplica(new ContainerID(2), dn2, 1, deleteCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(2), dn3, 1, addCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(3), dn3, 0, addCmd, expiry); - pendingOps.scheduleDeleteReplica(new ContainerID(4), dn3, 0, deleteCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(1), dn1, 1, deleteCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(1), dn1, 2, addCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(2), dn2, 1, deleteCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(2), dn3, 1, addCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(3), dn3, 0, addCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(4), dn3, 0, deleteCmd, expiry); // InFlight Replication and Deletion assertEquals(3, pendingOps.getPendingOpCount(ADD)); @@ -327,32 +327,32 @@ public void testReplicationMetrics() { assertEquals(metrics.getReplicaDeleteTimeoutTotal(), 1); expiry = clock.millis() + 1000; - pendingOps.scheduleDeleteReplica(new ContainerID(3), dn1, 2, deleteCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(3), dn1, 3, addCmd, expiry); - pendingOps.scheduleDeleteReplica(new ContainerID(4), dn2, 2, deleteCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(4), dn3, 4, addCmd, expiry); - pendingOps.scheduleAddReplica(new ContainerID(5), dn3, 0, addCmd, expiry); - pendingOps.scheduleDeleteReplica(new ContainerID(6), dn3, 0, deleteCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(3), dn1, 2, deleteCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(3), dn1, 3, addCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(4), dn2, 2, deleteCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(4), dn3, 4, addCmd, expiry); + pendingOps.scheduleAddReplica(ContainerID.valueOf(5), dn3, 0, addCmd, expiry); + pendingOps.scheduleDeleteReplica(ContainerID.valueOf(6), dn3, 0, deleteCmd, expiry); // InFlight Replication and Deletion. Previous Inflight should be // removed as they were timed out, but deletes are retained assertEquals(3, pendingOps.getPendingOpCount(ADD)); assertEquals(6, pendingOps.getPendingOpCount(DELETE)); - pendingOps.completeDeleteReplica(new ContainerID(3), dn1, 2); - pendingOps.completeAddReplica(new ContainerID(3), dn1, 3); - pendingOps.completeDeleteReplica(new ContainerID(4), dn2, 2); - pendingOps.completeAddReplica(new ContainerID(4), dn3, 4); - pendingOps.completeDeleteReplica(new ContainerID(6), dn3, 0); - pendingOps.completeAddReplica(new ContainerID(5), dn3, 0); + pendingOps.completeDeleteReplica(ContainerID.valueOf(3), dn1, 2); + pendingOps.completeAddReplica(ContainerID.valueOf(3), dn1, 3); + pendingOps.completeDeleteReplica(ContainerID.valueOf(4), dn2, 2); + pendingOps.completeAddReplica(ContainerID.valueOf(4), dn3, 4); + pendingOps.completeDeleteReplica(ContainerID.valueOf(6), dn3, 0); + pendingOps.completeAddReplica(ContainerID.valueOf(5), dn3, 0); assertEquals(metrics.getEcReplicasCreatedTotal(), 2); assertEquals(metrics.getEcReplicasDeletedTotal(), 2); assertEquals(metrics.getReplicasCreatedTotal(), 1); assertEquals(metrics.getReplicasDeletedTotal(), 1); - pendingOps.completeDeleteReplica(new ContainerID(3), dn1, 2); - pendingOps.completeAddReplica(new ContainerID(2), dn1, 3); + pendingOps.completeDeleteReplica(ContainerID.valueOf(3), dn1, 2); + pendingOps.completeAddReplica(ContainerID.valueOf(2), dn1, 3); // Checking pendingOpCount doesn't go below zero assertEquals(0, pendingOps.getPendingOpCount(ADD)); @@ -374,7 +374,7 @@ public void testNotifySubscribers() { pendingOps.registerSubscriber(subscriber2); // schedule an ADD and a DELETE - ContainerID containerID = new ContainerID(1); + ContainerID containerID = ContainerID.valueOf(1); pendingOps.scheduleAddReplica(containerID, dn1, 0, addCmd, deadline); ContainerReplicaOp addOp = pendingOps.getPendingOps(containerID).get(0); pendingOps.scheduleDeleteReplica(containerID, dn1, 0, deleteCmd, deadline); @@ -411,7 +411,7 @@ public void testNotifySubscribers() { @Test public void subscribersShouldNotBeNotifiedWhenOpsHaveNotExpired() { - ContainerID containerID = new ContainerID(1); + ContainerID containerID = ContainerID.valueOf(1); // schedule ops pendingOps.scheduleDeleteReplica(containerID, dn1, 0, deleteCmd, deadline); @@ -431,7 +431,7 @@ public void subscribersShouldNotBeNotifiedWhenOpsHaveNotExpired() { @Test public void subscribersShouldNotBeNotifiedWhenReplacingAnOpWithDuplicate() { - ContainerID containerID = new ContainerID(1); + ContainerID containerID = ContainerID.valueOf(1); // schedule ops pendingOps.scheduleAddReplica(containerID, dn2, 0, addCmd, deadline); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java new file mode 100644 index 00000000000..20f6de21b03 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Test for QuasiClosedStuckOverReplicationHandler. + */ +public class TestQuasiClosedStuckOverReplicationHandler { + + private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = RatisReplicationConfig.getInstance(THREE); + private ContainerInfo container; + private ReplicationManager replicationManager; + private ReplicationManagerMetrics metrics; + private Set>> commandsSent; + private QuasiClosedStuckOverReplicationHandler handler; + private UUID origin1 = UUID.randomUUID(); + private UUID origin2 = UUID.randomUUID(); + + @BeforeEach + void setup() throws NodeNotFoundException, + CommandTargetOverloadedException, NotLeaderException { + container = ReplicationTestUtil.createContainer( + HddsProtos.LifeCycleState.QUASI_CLOSED, RATIS_REPLICATION_CONFIG); + + replicationManager = mock(ReplicationManager.class); + OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + ozoneConfiguration.setBoolean("hdds.scm.replication.push", true); + when(replicationManager.getConfig()) + .thenReturn(ozoneConfiguration.getObject( + ReplicationManager.ReplicationManagerConfiguration.class)); + metrics = ReplicationManagerMetrics.create(replicationManager); + when(replicationManager.getMetrics()).thenReturn(metrics); + + /* + Return NodeStatus with NodeOperationalState as specified in + DatanodeDetails, and NodeState as HEALTHY. + */ + when( + replicationManager.getNodeStatus(any(DatanodeDetails.class))) + .thenAnswer(invocationOnMock -> { + DatanodeDetails dn = invocationOnMock.getArgument(0); + return new NodeStatus(dn.getPersistedOpState(), + HddsProtos.NodeState.HEALTHY); + }); + + commandsSent = new HashSet<>(); + ReplicationTestUtil.mockRMSendThrottledDeleteCommand( + replicationManager, commandsSent); + handler = new QuasiClosedStuckOverReplicationHandler(replicationManager); + } + + @Test + public void testReturnsZeroIfNotOverReplicated() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testNoCommandsScheduledIfPendingOps() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + List pendingOps = new ArrayList<>(); + pendingOps.add(ContainerReplicaOp.create( + ContainerReplicaOp.PendingOpType.DELETE, MockDatanodeDetails.randomDatanodeDetails(), 0)); + + int count = handler.processAndSendCommands(replicas, pendingOps, getOverReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testCommandScheduledForOverReplicatedContainer() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1); + assertEquals(1, count); + SCMCommand command = commandsSent.iterator().next().getRight(); + assertEquals(StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.deleteContainerCommand, command.getType()); + } + + @Test + public void testOverloadedExceptionContinuesAndThrows() throws NotLeaderException, CommandTargetOverloadedException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + ReplicationTestUtil.mockRMSendThrottledDeleteCommand(replicationManager, commandsSent, new AtomicBoolean(true)); + + assertThrows(CommandTargetOverloadedException.class, () -> + handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1)); + assertEquals(1, commandsSent.size()); + } + + + private ContainerHealthResult.OverReplicatedHealthResult getOverReplicatedHealthResult() { + ContainerHealthResult.OverReplicatedHealthResult + healthResult = mock(ContainerHealthResult.OverReplicatedHealthResult.class); + when(healthResult.getContainerInfo()).thenReturn(container); + return healthResult; + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckReplicaCount.java new file mode 100644 index 00000000000..2e19e788509 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckReplicaCount.java @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED; +import static org.apache.ratis.util.Preconditions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Tests for the QuasiClosedStuckReplicaCount class. + */ +public class TestQuasiClosedStuckReplicaCount { + + private UUID origin1; + private UUID origin2; + private UUID origin3; + + @BeforeEach + public void setUp() { + origin1 = UUID.randomUUID(); + origin2 = UUID.randomUUID(); + origin3 = UUID.randomUUID(); + } + + @Test + public void testCorrectlyReplicationWithThreeOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testCorrectReplicationWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testCorrectReplicationWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testUnderReplicationWithThreeOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 1, origin3); + } + + @Test + public void testUnderReplicationWithThreeOriginsTwoUnderReplicated() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + List misReplicatedOrigins = + replicaCount.getUnderReplicatedReplicas(); + assertTrue(misReplicatedOrigins.size() == 2); + + for (QuasiClosedStuckReplicaCount.MisReplicatedOrigin misReplicatedOrigin : misReplicatedOrigins) { + UUID source = misReplicatedOrigin.getSources().iterator().next().getOriginDatanodeId(); + assertTrue(source.equals(origin1) || source.equals(origin3)); + } + } + + @Test + public void testUnderReplicationWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 1, origin2); + } + + @Test + public void testUnderReplicationWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 2, origin1); + } + + @Test + public void testOverReplicationWithThreeOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 3, 1, origin3); + } + + @Test + public void testOverReplicationWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 3, 1, origin2); + } + + @Test + public void testOverReplicationWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 4, 1, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithThreeOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, DECOMMISSIONING), Pair.of(origin1, DECOMMISSIONING), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 2, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONING), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 1, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONING), Pair.of(origin1, DECOMMISSIONING)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 3, 2, origin1); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithThreeOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONED), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONED), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, DECOMMISSIONED)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testUnderReplicationWithMaintenanceWithOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE), Pair.of(origin1, ENTERING_MAINTENANCE)); + + replicaCount = new QuasiClosedStuckReplicaCount(replicas, 2); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 3, 1, origin1); + } + + @Test + public void testUnderReplicationWithMaintenanceWithTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, ENTERING_MAINTENANCE), Pair.of(origin1, ENTERING_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 1, origin1); + } + + @Test + public void testNoOverReplicationWithExcessMaintenanceReplicasTwoOrigins() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithExcessMaintenanceReplicasOneOrigin() { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, IN_MAINTENANCE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + private void validateMisReplicatedOrigins( + List misReplicatedOrigins, + int expectedUnderRepOrigins, int expectedSources, int expectedDelta, UUID expectedOrigin) { + + assertTrue(misReplicatedOrigins.size() == expectedUnderRepOrigins); + Set sources = misReplicatedOrigins.get(0).getSources(); + assertEquals(sources.size(), expectedSources); + for (ContainerReplica source : sources) { + assertTrue(source.getOriginDatanodeId().equals(expectedOrigin)); + } + assertTrue(misReplicatedOrigins.get(0).getReplicaDelta() == expectedDelta); + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckUnderReplicationHandler.java new file mode 100644 index 00000000000..2c46aea502c --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckUnderReplicationHandler.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.hdds.scm.pipeline.InsufficientDatanodesException; +import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Test for QuasiClosedStuckUnderReplicationHandler. + */ +public class TestQuasiClosedStuckUnderReplicationHandler { + + private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = RatisReplicationConfig.getInstance(THREE); + private ContainerInfo container; + private NodeManager nodeManager; + private OzoneConfiguration conf; + private ReplicationManager replicationManager; + private ReplicationManagerMetrics metrics; + private PlacementPolicy policy; + private Set>> commandsSent; + private QuasiClosedStuckUnderReplicationHandler handler; + + + @BeforeEach + void setup(@TempDir File testDir) throws NodeNotFoundException, + CommandTargetOverloadedException, NotLeaderException { + container = ReplicationTestUtil.createContainer( + HddsProtos.LifeCycleState.QUASI_CLOSED, RATIS_REPLICATION_CONFIG); + + nodeManager = mock(NodeManager.class); + conf = SCMTestUtils.getConf(testDir); + policy = ReplicationTestUtil + .getSimpleTestPlacementPolicy(nodeManager, conf); + replicationManager = mock(ReplicationManager.class); + OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + ozoneConfiguration.setBoolean("hdds.scm.replication.push", true); + when(replicationManager.getConfig()) + .thenReturn(ozoneConfiguration.getObject( + ReplicationManager.ReplicationManagerConfiguration.class)); + metrics = ReplicationManagerMetrics.create(replicationManager); + when(replicationManager.getMetrics()).thenReturn(metrics); + + /* + Return NodeStatus with NodeOperationalState as specified in + DatanodeDetails, and NodeState as HEALTHY. + */ + when( + replicationManager.getNodeStatus(any(DatanodeDetails.class))) + .thenAnswer(invocationOnMock -> { + DatanodeDetails dn = invocationOnMock.getArgument(0); + return new NodeStatus(dn.getPersistedOpState(), + HddsProtos.NodeState.HEALTHY); + }); + + commandsSent = new HashSet<>(); + ReplicationTestUtil.mockRMSendThrottleReplicateCommand( + replicationManager, commandsSent, new AtomicBoolean(false)); + ReplicationTestUtil.mockRMSendDatanodeCommand(replicationManager, + commandsSent); + ReplicationTestUtil.mockRMSendDeleteCommand(replicationManager, + commandsSent); + handler = new QuasiClosedStuckUnderReplicationHandler(policy, conf, + replicationManager); + } + + @Test + public void testReturnsZeroIfNotUnderReplicated() throws IOException { + UUID origin = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getUnderReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testNoCommandsScheduledIfPendingOps() throws IOException { + UUID origin = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE)); + List pendingOps = new ArrayList<>(); + pendingOps.add(ContainerReplicaOp.create( + ContainerReplicaOp.PendingOpType.ADD, MockDatanodeDetails.randomDatanodeDetails(), 0)); + + int count = handler.processAndSendCommands(replicas, pendingOps, getUnderReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testCommandScheduledForUnderReplicatedContainer() throws IOException { + UUID origin = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getUnderReplicatedHealthResult(), 1); + assertEquals(2, count); + ReplicationTestUtil.mockRMSendThrottleReplicateCommand(replicationManager, commandsSent, new AtomicBoolean(true)); + } + + @Test + public void testOverloadedExceptionContinuesAndThrows() throws NotLeaderException, CommandTargetOverloadedException { + UUID origin1 = UUID.randomUUID(); + UUID origin2 = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + ReplicationTestUtil.mockRMSendThrottleReplicateCommand(replicationManager, commandsSent, new AtomicBoolean(true)); + + assertThrows(CommandTargetOverloadedException.class, () -> + handler.processAndSendCommands(replicas, Collections.emptyList(), getUnderReplicatedHealthResult(), 1)); + assertEquals(1, commandsSent.size()); + } + + @Test + public void testInsufficientNodesExceptionThrown() { + UUID origin1 = UUID.randomUUID(); + UUID origin2 = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + policy = ReplicationTestUtil.getNoNodesTestPlacementPolicy(nodeManager, conf); + handler = new QuasiClosedStuckUnderReplicationHandler(policy, conf, replicationManager); + + assertThrows(SCMException.class, () -> + handler.processAndSendCommands(replicas, Collections.emptyList(), getUnderReplicatedHealthResult(), 1)); + assertEquals(0, commandsSent.size()); + } + + @Test + public void testPartialReplicationExceptionThrown() { + UUID origin1 = UUID.randomUUID(); + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE)); + + policy = ReplicationTestUtil.getInsufficientNodesTestPlacementPolicy(nodeManager, conf, 2); + handler = new QuasiClosedStuckUnderReplicationHandler(policy, conf, replicationManager); + + assertThrows(InsufficientDatanodesException.class, () -> + handler.processAndSendCommands(replicas, Collections.emptyList(), getUnderReplicatedHealthResult(), 1)); + assertEquals(1, commandsSent.size()); + } + + private ContainerHealthResult.UnderReplicatedHealthResult getUnderReplicatedHealthResult() { + ContainerHealthResult.UnderReplicatedHealthResult + healthResult = mock(ContainerHealthResult.UnderReplicatedHealthResult.class); + when(healthResult.getContainerInfo()).thenReturn(container); + return healthResult; + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index e0a4130021d..7555e1ab88b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -39,7 +39,6 @@ import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.anyList; -import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.clearInvocations; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.eq; @@ -49,7 +48,6 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.google.common.collect.ImmutableList; import com.google.protobuf.Proto2Utils; import java.io.IOException; import java.time.Instant; @@ -462,104 +460,6 @@ public void testQuasiClosedContainerWithUnhealthyReplicaOnUniqueOrigin() replicas.add(unhealthy); storeContainerAndReplicas(container, replicas); - replicationManager.processContainer(container, repQueue, repReport); - assertEquals(0, repReport.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - assertEquals(0, repReport.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - assertEquals(0, repQueue.underReplicatedQueueSize()); - assertEquals(0, repQueue.overReplicatedQueueSize()); - } - - @Test - public void testQuasiClosedContainerWithVulnerableUnhealthyReplica() - throws IOException, NodeNotFoundException { - RatisReplicationConfig ratisRepConfig = - RatisReplicationConfig.getInstance(THREE); - long sequenceID = 10; - ContainerInfo container = createContainerInfo(ratisRepConfig, 1, - HddsProtos.LifeCycleState.QUASI_CLOSED, sequenceID); - - // this method creates replicas with same origin id and zero sequence id - Set replicas = - createReplicasWithSameOrigin(container.containerID(), - ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0); - replicas.add(createContainerReplica(container.containerID(), 0, - IN_SERVICE, ContainerReplicaProto.State.UNHEALTHY, sequenceID)); - ContainerReplica decommissioning = - createContainerReplica(container.containerID(), 0, DECOMMISSIONING, - ContainerReplicaProto.State.UNHEALTHY, sequenceID); - replicas.add(decommissioning); - storeContainerAndReplicas(container, replicas); - when(replicationManager.getNodeStatus(any(DatanodeDetails.class))) - .thenAnswer(invocation -> { - DatanodeDetails dn = invocation.getArgument(0); - if (dn.equals(decommissioning.getDatanodeDetails())) { - return new NodeStatus(DECOMMISSIONING, HddsProtos.NodeState.HEALTHY); - } - - return NodeStatus.inServiceHealthy(); - }); - - replicationManager.processContainer(container, repQueue, repReport); - assertEquals(1, repReport.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - assertEquals(0, repReport.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - assertEquals(1, repQueue.underReplicatedQueueSize()); - assertEquals(0, repQueue.overReplicatedQueueSize()); - - when(ratisPlacementPolicy.chooseDatanodes(anyList(), anyList(), eq(null), eq(1), anyLong(), - anyLong())).thenAnswer(invocation -> ImmutableList.of(MockDatanodeDetails.randomDatanodeDetails())); - when(nodeManager.getTotalDatanodeCommandCounts(any(DatanodeDetails.class), any(), any())) - .thenAnswer(invocation -> { - Map map = new HashMap<>(); - map.put(SCMCommandProto.Type.replicateContainerCommand, 0); - map.put(SCMCommandProto.Type.reconstructECContainersCommand, 0); - return map; - }); - RatisUnderReplicationHandler handler = - new RatisUnderReplicationHandler(ratisPlacementPolicy, configuration, replicationManager); - - handler.processAndSendCommands(replicas, Collections.emptyList(), repQueue.dequeueUnderReplicatedContainer(), 2); - assertEquals(1, commandsSent.size()); - Pair> command = commandsSent.iterator().next(); - assertEquals(SCMCommandProto.Type.replicateContainerCommand, command.getValue().getType()); - assertEquals(decommissioning.getDatanodeDetails().getUuid(), command.getKey()); - } - - - /** - * There is a QUASI_CLOSED container with some UNHEALTHY replicas on unique origin nodes. If the datanode hosting - * one such replica is being taken offline, then the UNHEALTHY replica needs to be replicated to another node. - */ - @Test - public void testQuasiClosedContainerWithUnhealthyReplicaOnDecommissioningNodeWithUniqueOrigin() - throws IOException, NodeNotFoundException { - RatisReplicationConfig ratisRepConfig = - RatisReplicationConfig.getInstance(THREE); - // create a QUASI_CLOSED container with 3 QUASI_CLOSED replicas on same origin, and 1 UNHEALTHY on unique origin - ContainerInfo container = createContainerInfo(ratisRepConfig, 1, - HddsProtos.LifeCycleState.QUASI_CLOSED); - Set replicas = - createReplicasWithSameOrigin(container.containerID(), - ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0); - ContainerReplica unhealthy = - createContainerReplica(container.containerID(), 0, DECOMMISSIONING, - ContainerReplicaProto.State.UNHEALTHY); - replicas.add(unhealthy); - storeContainerAndReplicas(container, replicas); - when(replicationManager.getNodeStatus(any(DatanodeDetails.class))) - .thenAnswer(invocation -> { - DatanodeDetails dn = invocation.getArgument(0); - if (dn.equals(unhealthy.getDatanodeDetails())) { - return new NodeStatus(DECOMMISSIONING, HddsProtos.NodeState.HEALTHY); - } - - return NodeStatus.inServiceHealthy(); - }); - - // the container should be under replicated and queued to under replication queue replicationManager.processContainer(container, repQueue, repReport); assertEquals(1, repReport.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); @@ -567,26 +467,6 @@ public void testQuasiClosedContainerWithUnhealthyReplicaOnDecommissioningNodeWit ReplicationManagerReport.HealthState.OVER_REPLICATED)); assertEquals(1, repQueue.underReplicatedQueueSize()); assertEquals(0, repQueue.overReplicatedQueueSize()); - - // next, this test sets up some mocks to test if RatisUnderReplicationHandler will handle this container correctly - when(ratisPlacementPolicy.chooseDatanodes(anyList(), anyList(), eq(null), eq(1), anyLong(), - anyLong())).thenAnswer(invocation -> ImmutableList.of(MockDatanodeDetails.randomDatanodeDetails())); - when(nodeManager.getTotalDatanodeCommandCounts(any(DatanodeDetails.class), any(), any())) - .thenAnswer(invocation -> { - Map map = new HashMap<>(); - map.put(SCMCommandProto.Type.replicateContainerCommand, 0); - map.put(SCMCommandProto.Type.reconstructECContainersCommand, 0); - return map; - }); - RatisUnderReplicationHandler handler = - new RatisUnderReplicationHandler(ratisPlacementPolicy, configuration, replicationManager); - - handler.processAndSendCommands(replicas, Collections.emptyList(), repQueue.dequeueUnderReplicatedContainer(), 2); - assertEquals(1, commandsSent.size()); - Pair> command = commandsSent.iterator().next(); - // a replicate command should have been sent for the UNHEALTHY replica - assertEquals(SCMCommandProto.Type.replicateContainerCommand, command.getValue().getType()); - assertEquals(unhealthy.getDatanodeDetails().getUuid(), command.getKey()); } /** @@ -1431,7 +1311,7 @@ public void testSendLowPriorityReplicateContainerCommand() replicationManager.sendLowPriorityReplicateContainerCommand(containerInfo, 0, src, target, scmDeadline); - ArgumentCaptor command = + ArgumentCaptor> command = ArgumentCaptor.forClass(SCMCommand.class); ArgumentCaptor targetUUID = ArgumentCaptor.forClass(UUID.class); @@ -1755,12 +1635,12 @@ public void testPendingOpExpiry() throws ContainerNotFoundException { ContainerReplicaOp delOp = new ContainerReplicaOp( ContainerReplicaOp.PendingOpType.DELETE, dn2, 1, command, commandDeadline); - replicationManager.opCompleted(addOp, new ContainerID(1L), false); - replicationManager.opCompleted(delOp, new ContainerID(1L), false); + replicationManager.opCompleted(addOp, ContainerID.valueOf(1L), false); + replicationManager.opCompleted(delOp, ContainerID.valueOf(1L), false); // No commands should be sent for either of the above ops. assertEquals(0, commandsSent.size()); - replicationManager.opCompleted(delOp, new ContainerID(1L), true); + replicationManager.opCompleted(delOp, ContainerID.valueOf(1L), true); assertEquals(1, commandsSent.size()); Pair> sentCommand = commandsSent.iterator().next(); // The target should be DN2 and the deadline should have been updated from the value set in commandDeadline above diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java index ac529b08f7f..79f6299bace 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java @@ -130,6 +130,7 @@ private static List getTestFiles() throws URISyntaxException { private static List loadTestsInFile(URI testFile) throws IOException { + System.out.println("Loading test file: " + testFile); ObjectReader reader = new ObjectMapper().readerFor(Scenario.class); try (InputStream stream = testFile.toURL().openStream()) { try (MappingIterator iterator = reader.readValues(stream)) { @@ -332,11 +333,10 @@ private void assertExpectedCommands(Scenario scenario, boolean found = false; for (Pair> command : commandsSent) { if (command.getRight().getType() == expectedCommand.getType()) { - DatanodeDetails targetDatanode = expectedCommand.getTargetDatanode(); - if (targetDatanode != null) { + if (expectedCommand.hasExpectedDatanode()) { // We need to assert against the command the datanode is sent to DatanodeDetails commandDatanode = findDatanodeFromUUID(command.getKey()); - if (commandDatanode != null && commandDatanode.equals(targetDatanode)) { + if (commandDatanode != null && expectedCommand.isTargetExpected(commandDatanode)) { found = true; commandsSent.remove(command); break; @@ -452,7 +452,7 @@ public ContainerReplica buildContainerReplica() { ContainerReplica.ContainerReplicaBuilder builder = new ContainerReplica.ContainerReplicaBuilder(); return builder.setReplicaIndex(index) - .setContainerID(new ContainerID(containerId)) + .setContainerID(ContainerID.valueOf(containerId)) .setContainerState(state) .setSequenceId(sequenceId) .setDatanodeDetails(datanodeDetails) @@ -550,6 +550,7 @@ public int getOverReplicatedQueue() { public static class ExpectedCommands { private SCMCommandProto.Type type; private String datanode; + private Set expectedDatanodes; public void setDatanode(String datanode) { this.datanode = datanode; @@ -564,15 +565,33 @@ public SCMCommandProto.Type getType() { return type; } - public DatanodeDetails getTargetDatanode() { + public boolean hasExpectedDatanode() { + createExpectedDatanodes(); + return !expectedDatanodes.isEmpty(); + } + + public boolean isTargetExpected(DatanodeDetails dn) { + createExpectedDatanodes(); + return expectedDatanodes.contains(dn); + } + + private void createExpectedDatanodes() { + if (expectedDatanodes != null) { + return; + } + this.expectedDatanodes = new HashSet<>(); if (datanode == null) { - return null; + return; } - DatanodeDetails datanodeDetails = DATANODE_ALIASES.get(this.datanode); - if (datanodeDetails == null) { - fail("Unable to find a datanode for the alias: " + datanode + " in the expected commands."); + String[] nodes = datanode.split("\\|"); + for (String n : nodes) { + DatanodeDetails dn = DATANODE_ALIASES.get(n); + if (dn != null) { + expectedDatanodes.add(dn); + } else { + fail("Expected datanode not found: " + datanode); + } } - return datanodeDetails; } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestQuasiClosedStuckReplicationCheck.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestQuasiClosedStuckReplicationCheck.java new file mode 100644 index 00000000000..6a6dd84243d --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestQuasiClosedStuckReplicationCheck.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication.health; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; +import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationQueue; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + + +/** + * Tests for the QuasiClosedStuckReplicationCheck class. + */ +public class TestQuasiClosedStuckReplicationCheck { + + private QuasiClosedStuckReplicationCheck handler; + private final UUID origin1 = UUID.randomUUID(); + private final UUID origin2 = UUID.randomUUID(); + private final UUID origin3 = UUID.randomUUID(); + private ReplicationManagerReport report; + private ReplicationQueue queue; + + @BeforeEach + public void setup() { + handler = new QuasiClosedStuckReplicationCheck(); + report = new ReplicationManagerReport(); + queue = new ReplicationQueue(); + } + + @Test + public void testClosedContainerReturnsFalse() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE)); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(new ReplicationManagerReport()) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertFalse(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testQuasiClosedNotStuckReturnsFalse() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin2, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertFalse(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testQuasiClosedStuckWithOpenReturnsFalse() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + containerReplicas.addAll(ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.OPEN, + Pair.of(origin3, IN_SERVICE))); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertFalse(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testCorrectlyReplicated() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertFalse(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testNoReplicasReturnsTrue() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = new HashSet<>(); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertTrue(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.MISSING)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testUnderReplicatedOneOriginNotHandled() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE)); + + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertFalse(handler.handle(request)); + } + + @Test + public void testUnderReplicatedWithPendingAddIsNotQueued() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + List pendingOps = new ArrayList<>(); + pendingOps.add(new ContainerReplicaOp( + ContainerReplicaOp.PendingOpType.ADD, MockDatanodeDetails.randomDatanodeDetails(), 0, null, Long.MAX_VALUE)); + + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .setPendingOps(pendingOps) + .build(); + + assertTrue(handler.handle(request)); + assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.MISSING)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + + @Test + public void testOverReplicatedIsQueued() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .build(); + + assertTrue(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.MISSING)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(1, queue.overReplicatedQueueSize()); + } + + @Test + public void testOverReplicatedWithPendingDeleteIsNotQueued() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1, QUASI_CLOSED); + + Set containerReplicas = ReplicationTestUtil + .createReplicasWithOriginAndOpState(containerInfo.containerID(), State.QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + List pendingOps = new ArrayList<>(); + pendingOps.add(new ContainerReplicaOp( + ContainerReplicaOp.PendingOpType.DELETE, MockDatanodeDetails.randomDatanodeDetails(), 0, null, Long.MAX_VALUE)); + + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(report) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReplicationQueue(queue) + .setPendingOps(pendingOps) + .build(); + + assertTrue(handler.handle(request)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.MISSING)); + assertEquals(0, queue.underReplicatedQueueSize()); + assertEquals(0, queue.overReplicatedQueueSize()); + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerAttribute.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerAttribute.java index acf58ded0be..8f321c85746 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerAttribute.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerAttribute.java @@ -23,8 +23,9 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.NavigableSet; +import java.util.NavigableMap; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.junit.jupiter.api.Test; @@ -43,24 +44,21 @@ static > boolean hasContainerID(ContainerAttribute attribut } static > boolean hasContainerID(ContainerAttribute attribute, T key, ContainerID id) { - final NavigableSet set = attribute.get(key); - return set != null && set.contains(id); + final NavigableMap map = attribute.get(key); + return map != null && map.containsKey(id); } @Test - public void testInsert() { + public void testAddNonExisting() { ContainerAttribute containerAttribute = new ContainerAttribute<>(Key.class); - ContainerID id = ContainerID.valueOf(42); - containerAttribute.insert(key1, id); + ContainerInfo info = new ContainerInfo.Builder().setContainerID(42).build(); + ContainerID id = info.containerID(); + containerAttribute.addNonExisting(key1, info); assertEquals(1, containerAttribute.getCollection(key1).size()); - assertThat(containerAttribute.getCollection(key1)).contains(id); + assertThat(containerAttribute.get(key1)).containsKey(id); - // Insert again and verify that the new ContainerId is inserted. - ContainerID newId = - ContainerID.valueOf(42); - containerAttribute.insert(key1, newId); - assertEquals(1, containerAttribute.getCollection(key1).size()); - assertThat(containerAttribute.getCollection(key1)).contains(newId); + // Adding it again should fail. + assertThrows(IllegalStateException.class, () -> containerAttribute.addNonExisting(key1, info)); } @Test @@ -68,7 +66,8 @@ public void testClearSet() { ContainerAttribute containerAttribute = new ContainerAttribute<>(Key.class); for (Key k : Key.values()) { for (int x = 1; x < 101; x++) { - containerAttribute.insert(k, ContainerID.valueOf(x)); + ContainerInfo info = new ContainerInfo.Builder().setContainerID(x).build(); + containerAttribute.addNonExisting(k, info); } } for (Key k : Key.values()) { @@ -85,7 +84,8 @@ public void testRemove() { for (Key k : Key.values()) { for (int x = 1; x < 101; x++) { - containerAttribute.insert(k, ContainerID.valueOf(x)); + ContainerInfo info = new ContainerInfo.Builder().setContainerID(x).build(); + containerAttribute.addNonExisting(k, info); } } for (int x = 1; x < 101; x += 2) { @@ -106,9 +106,10 @@ public void testRemove() { @Test public void tesUpdate() throws SCMException { ContainerAttribute containerAttribute = new ContainerAttribute<>(Key.class); - ContainerID id = ContainerID.valueOf(42); + ContainerInfo info = new ContainerInfo.Builder().setContainerID(42).build(); + ContainerID id = info.containerID(); - containerAttribute.insert(key1, id); + containerAttribute.addNonExisting(key1, info); assertTrue(hasContainerID(containerAttribute, key1, id)); assertFalse(hasContainerID(containerAttribute, key2, id)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestCommandQueue.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestCommandQueue.java index a917bc02251..d47a37f2631 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestCommandQueue.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestCommandQueue.java @@ -101,7 +101,7 @@ public void testSummaryUpdated() { datanode2UUID, SCMCommandProto.Type.createPipelineCommand)); // Ensure the counts are cleared when the commands are retrieved - List cmds = commandQueue.getCommand(datanode1UUID); + List> cmds = commandQueue.getCommand(datanode1UUID); assertEquals(5, cmds.size()); assertEquals(0, commandQueue.getDatanodeCommandCount( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index 4a9765c0c80..ef40c6b814b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -260,6 +260,6 @@ private void addReplica(ContainerInfo cont, DatanodeDetails node) { .setDatanodeDetails(node) .build(); containerManager.getContainerStateManager() - .updateContainerReplica(cont.containerID(), replica); + .updateContainerReplica(replica); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 25802ddb813..cb2315f7fd5 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -644,7 +644,7 @@ public void testSetNodeOpStateAndCommandFired() // If found mismatch, leader SCM fires a SetNodeOperationalStateCommand // to update the opState persisted in Datanode. scm.getScmContext().updateLeaderAndTerm(true, 1); - List commands = nodeManager.processHeartbeat(dn); + List> commands = nodeManager.processHeartbeat(dn); assertEquals(SetNodeOperationalStateCommand.class, commands.get(0).getClass()); @@ -1763,7 +1763,7 @@ public void testHandlingSCMCommandEvent() PipelineID.randomId()))); eq.processAll(1000L); - List command = + List> command = nodemanager.processHeartbeat(datanodeDetails); // With dh registered, SCM will send create pipeline command to dn assertThat(command.size()).isGreaterThanOrEqualTo(1); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index 6eb7d7c943f..48508891f6d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -372,7 +372,7 @@ public RegisteredCommand register(DatanodeDetails dd, * @return SCMheartbeat response list */ @Override - public List processHeartbeat(DatanodeDetails dd, + public List> processHeartbeat(DatanodeDetails dd, CommandQueueReportProto commandQueueReportProto) { return null; } @@ -401,7 +401,7 @@ public void addNode(DatanodeDetails id, NodeStatus status) { } @Override - public void addDatanodeCommand(UUID dnId, SCMCommand command) { + public void addDatanodeCommand(UUID dnId, SCMCommand command) { this.commandQueue.addCommand(dnId, command); } @@ -491,7 +491,7 @@ public void onMessage(CommandForDatanode commandForDatanode, } @Override - public List getCommandQueue(UUID dnID) { + public List> getCommandQueue(UUID dnID) { return null; } diff --git a/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json b/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json index c9e54ded449..3e6217d5dc8 100644 --- a/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json +++ b/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json @@ -19,7 +19,8 @@ "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1}, "checkCommands": [], "commands": [ - { "type": "replicateContainerCommand" } + { "type": "replicateContainerCommand", "datanode": "d1" }, + { "type": "replicateContainerCommand", "datanode": "d2" } ] }, { "description": "Quasi-closed with 3 replicas 2 origins", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12, @@ -28,9 +29,11 @@ { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"}, { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 12, "isEmpty": false, "origin": "o2"} ], - "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1}, + "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1}, "checkCommands": [], - "commands": [] + "commands": [ + { "type": "replicateContainerCommand", "datanode": "d1" } + ] }, { "description": "Quasi-closed with 3 replicas 3 origins", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12, "replicas": [ @@ -98,9 +101,45 @@ "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, "checkCommands": [], "commands": [ - { "type": "replicateContainerCommand" } + { "type": "replicateContainerCommand", "datanode": "d1" }, + { "type": "replicateContainerCommand", "datanode": "d2" }, + { "type": "replicateContainerCommand", "datanode": "d3" } ] }, + { "description": "Quasi-Closed one Origin Correctly Replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, + { "description": "Quasi-Closed two Origins Correctly Replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, + { "description": "Quasi-Closed three Origins Correctly Replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "UNHEALTHY", "index": 0, "datanode": "d5", "sequenceId": 11, "isEmpty": false, "origin": "o3"}, + { "state": "UNHEALTHY", "index": 0, "datanode": "d6", "sequenceId": 11, "isEmpty": false, "origin": "o3"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, { "description": "Quasi-Closed with 3 QC and one unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, "replicas": [ { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, @@ -108,8 +147,122 @@ { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, { "state": "UNHEALTHY", "index": 0, "datanode": "d4", "sequenceId": 11, "isEmpty": false, "origin": "o3"} ], + "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "replicateContainerCommand", "datanode": "d1" }, + { "type": "replicateContainerCommand", "datanode": "d4" } + ] + }, + { "description": "Quasi-Closed 3 on one origin 1 unhealthy decommissioning", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "UNHEALTHY", "index": 0, "datanode": "d4", "sequenceId": 11, "isEmpty": false, "origin": "o3", "operationalState": "DECOMMISSIONING"} + ], + "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "replicateContainerCommand", "datanode": "d4" }, + { "type": "replicateContainerCommand", "datanode": "d4" } + ] + }, + { "description": "Quasi-Closed 3 on one origin 2 unhealthy with 1 decommissioning", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "UNHEALTHY", "index": 0, "datanode": "d4", "sequenceId": 11, "isEmpty": false, "origin": "o3", "operationalState": "DECOMMISSIONING"}, + { "state": "UNHEALTHY", "index": 0, "datanode": "d5", "sequenceId": 11, "isEmpty": false, "origin": "o3"} + ], + "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "replicateContainerCommand", "datanode": "d4|d5" }] + }, + { "description": "Quasi-Closed stuck one origin over replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o1"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3|d4" } + ] + }, + { "description": "Quasi-Closed stuck two origins over replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3" }, + { "type": "deleteContainerCommand", "datanode": "d4|d5|d6" } + ] + }, + { "description": "Quasi-Closed stuck two origins not over replicated with maintenance", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "IN_MAINTENANCE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "IN_MAINTENANCE"} + ], "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, "checkCommands": [], "commands": [] + }, + { "description": "Quasi-Closed stuck two origins not over replicated with decommission", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "DECOMMISSIONED" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "DECOMMISSIONED"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, + { "description": "Quasi-Closed stuck two origins over replicated with maintenance", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "IN_MAINTENANCE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d7", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "IN_MAINTENANCE"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3" } + ] + }, + { "description": "Quasi-Closed stuck two origins over replicated with stale", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "healthState": "STALE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "healthState": "STALE" } + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] } -] \ No newline at end of file +] diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java index 44fd4968995..265b23c25d3 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java @@ -46,16 +46,16 @@ public void execute(ScmClient scmClient) throws IOException { // Output data list if (execReturn) { System.out.println("SCM is in safe mode."); - if (verbose) { - for (Map.Entry> entry : - scmClient.getSafeModeRuleStatuses().entrySet()) { - Pair value = entry.getValue(); - System.out.printf("validated:%s, %s, %s%n", - value.getLeft(), entry.getKey(), value.getRight()); - } - } } else { System.out.println("SCM is out of safe mode."); } + if (verbose) { + for (Map.Entry> entry : + scmClient.getSafeModeRuleStatuses().entrySet()) { + Pair value = entry.getValue(); + System.out.printf("validated:%s, %s, %s%n", + value.getLeft(), entry.getKey(), value.getRight()); + } + } } } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoSubcommand.java index 14d6a0e84d1..e1c54add12f 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoSubcommand.java @@ -216,14 +216,13 @@ private static String buildDatanodeDetails(DatanodeDetails details) { } private static String buildReplicaDetails(ContainerReplicaInfo replica) { - StringBuilder sb = new StringBuilder(); - sb.append("State: " + replica.getState() + ";"); + StringBuilder sb = new StringBuilder() + .append("State: ").append(replica.getState()).append(";"); if (replica.getReplicaIndex() != -1) { - sb.append(" ReplicaIndex: " + replica.getReplicaIndex() + ";"); + sb.append(" ReplicaIndex: ").append(replica.getReplicaIndex()).append(";"); } - sb.append(" Origin: " + replica.getPlaceOfBirth().toString() + ";"); - sb.append(" Location: " - + buildDatanodeDetails(replica.getDatanodeDetails())); + sb.append(" Origin: ").append(replica.getPlaceOfBirth().toString()).append(";") + .append(" Location: ").append(buildDatanodeDetails(replica.getDatanodeDetails())); return sb.toString(); } diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestReportSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestReportSubCommand.java index 48269253fb7..f74850e6c4a 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestReportSubCommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestReportSubCommand.java @@ -177,7 +177,7 @@ private String containerList(int start, int end) { if (i != start) { sb.append(", "); } - sb.append("#" + i); + sb.append("#").append(i); } return sb.toString(); } diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java index d810df6722b..66ced0132a4 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java @@ -264,14 +264,14 @@ private List getNodeDetails(int n) { private Map> getContainersOnDecomNodes() { Map> containerMap = new HashMap<>(); List underReplicated = new ArrayList<>(); - underReplicated.add(new ContainerID(1L)); - underReplicated.add(new ContainerID(2L)); - underReplicated.add(new ContainerID(3L)); + underReplicated.add(ContainerID.valueOf(1L)); + underReplicated.add(ContainerID.valueOf(2L)); + underReplicated.add(ContainerID.valueOf(3L)); containerMap.put("UnderReplicated", underReplicated); List unclosed = new ArrayList<>(); - unclosed.add(new ContainerID(10L)); - unclosed.add(new ContainerID(11L)); - unclosed.add(new ContainerID(12L)); + unclosed.add(ContainerID.valueOf(10L)); + unclosed.add(ContainerID.valueOf(11L)); + unclosed.add(ContainerID.valueOf(12L)); containerMap.put("UnClosed", unclosed); return containerMap; } diff --git a/hadoop-ozone/cli-shell/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneAddress.java b/hadoop-ozone/cli-shell/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneAddress.java index 28d25bbb2c9..30e4192ce6a 100644 --- a/hadoop-ozone/cli-shell/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneAddress.java +++ b/hadoop-ozone/cli-shell/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneAddress.java @@ -52,7 +52,7 @@ public void checkRootUrlType(String prefix) throws OzoneClientException { address = new OzoneAddress(""); address.ensureRootAddress(); - address = new OzoneAddress(prefix + ""); + address = new OzoneAddress(prefix); address.ensureRootAddress(); } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 7e80766c7fe..a6d849a127b 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -112,9 +112,6 @@ private OMConfigKeys() { public static final String OZONE_OM_VOLUME_LISTALL_ALLOWED = "ozone.om.volume.listall.allowed"; public static final boolean OZONE_OM_VOLUME_LISTALL_ALLOWED_DEFAULT = true; - public static final String OZONE_OM_USER_MAX_VOLUME = - "ozone.om.user.max.volume"; - public static final int OZONE_OM_USER_MAX_VOLUME_DEFAULT = 1024; public static final String OZONE_KEY_DELETING_LIMIT_PER_TASK = "ozone.key.deleting.limit.per.task"; diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java index 9abbaafe5fb..9a24428f5c9 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.om; +import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigTag; @@ -54,6 +55,18 @@ public class OmConfig extends ReconfigurableConfig { ) private long maxListSize; + @Config( + key = "user.max.volume", + defaultValue = "1024", + description = "The maximum number of volumes a user can have on a cluster.Increasing or " + + "decreasing this number has no real impact on ozone cluster. This is " + + "defined only for operational purposes. Only an administrator can create a " + + "volume, once a volume is created there are no restrictions on the number " + + "of buckets or keys inside each bucket a user can create.", + tags = { ConfigTag.OM, ConfigTag.MANAGEMENT } + ) + private int maxUserVolumeCount; + public boolean isFileSystemPathEnabled() { return fileSystemPathEnabled; } @@ -71,11 +84,23 @@ public void setMaxListSize(long newValue) { validate(); } + public int getMaxUserVolumeCount() { + return maxUserVolumeCount; + } + + public void setMaxUserVolumeCount(int newValue) { + maxUserVolumeCount = newValue; + validate(); + } + @PostConstruct public void validate() { if (maxListSize <= 0) { maxListSize = Defaults.SERVER_LIST_MAX_SIZE; } + + Preconditions.checkArgument(this.maxUserVolumeCount > 0, + Keys.USER_MAX_VOLUME + " value should be greater than zero"); } public OmConfig copy() { @@ -87,6 +112,7 @@ public OmConfig copy() { public void setFrom(OmConfig other) { fileSystemPathEnabled = other.fileSystemPathEnabled; maxListSize = other.maxListSize; + maxUserVolumeCount = other.maxUserVolumeCount; } /** @@ -95,6 +121,7 @@ public void setFrom(OmConfig other) { public static final class Keys { public static final String ENABLE_FILESYSTEM_PATHS = "ozone.om.enable.filesystem.paths"; public static final String SERVER_LIST_MAX_SIZE = "ozone.om.server.list.max.size"; + public static final String USER_MAX_VOLUME = "ozone.om.user.max.volume"; } /** diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/TestOmConfig.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/TestOmConfig.java index 29343ea22af..f02f4bc57d0 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/TestOmConfig.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/TestOmConfig.java @@ -19,6 +19,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import org.apache.hadoop.hdds.conf.MutableConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -52,6 +53,14 @@ void overridesInvalidListSize(long invalidValue) { .isEqualTo(OmConfig.Defaults.SERVER_LIST_MAX_SIZE); } + @Test + void throwsOnInvalidMaxUserVolume() { + MutableConfigurationSource conf = new OzoneConfiguration(); + conf.setInt(OmConfig.Keys.USER_MAX_VOLUME, 0); + + assertThrows(IllegalArgumentException.class, () -> conf.getObject(OmConfig.class)); + } + @Test void testCopy() { MutableConfigurationSource conf = new OzoneConfiguration(); @@ -69,6 +78,7 @@ void testSetFrom() { OmConfig updated = conf.getObject(OmConfig.class); updated.setFileSystemPathEnabled(!updated.isFileSystemPathEnabled()); updated.setMaxListSize(updated.getMaxListSize() + 1); + updated.setMaxUserVolumeCount(updated.getMaxUserVolumeCount() + 1); subject.setFrom(updated); @@ -78,6 +88,7 @@ void testSetFrom() { private static void assertConfigEquals(OmConfig expected, OmConfig actual) { assertEquals(expected.getMaxListSize(), actual.getMaxListSize()); assertEquals(expected.isFileSystemPathEnabled(), actual.isFileSystemPathEnabled()); + assertEquals(expected.getMaxUserVolumeCount(), actual.getMaxUserVolumeCount()); } } diff --git a/hadoop-ozone/dev-support/checks/bats.sh b/hadoop-ozone/dev-support/checks/bats.sh index 3dec6052a92..49d5cb0912a 100755 --- a/hadoop-ozone/dev-support/checks/bats.sh +++ b/hadoop-ozone/dev-support/checks/bats.sh @@ -34,6 +34,7 @@ find * \( \ -path '*/src/test/shell/*' -name '*.bats' \ -or -path dev-support/ci/selective_ci_checks.bats \ -or -path dev-support/ci/pr_title_check.bats \ + -or -path dev-support/ci/find_test_class_project.bats \ \) -print0 \ | xargs -0 -n1 bats --formatter tap \ | tee -a "${REPORT_DIR}/output.log" diff --git a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot index 55e4ed841d4..5af45d97c55 100644 --- a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot +++ b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot @@ -42,15 +42,15 @@ Testing audit parser ${logdir} = Get Environment Variable OZONE_LOG_DIR /var/log/ozone ${logfile} = Execute ls -t "${logdir}" | grep om-audit | head -1 - Execute ozone auditparser "${auditworkdir}/audit.db" load "${logdir}/${logfile}" - ${result} = Execute ozone auditparser "${auditworkdir}/audit.db" template top5cmds + Execute ozone debug auditparser "${auditworkdir}/audit.db" load "${logdir}/${logfile}" + ${result} = Execute ozone debug auditparser "${auditworkdir}/audit.db" template top5cmds Should Contain ${result} ALLOCATE_KEY - ${result} = Execute ozone auditparser "${auditworkdir}/audit.db" template top5users + ${result} = Execute ozone debug auditparser "${auditworkdir}/audit.db" template top5users Run Keyword If '${SECURITY_ENABLED}' == 'true' Set username Should Contain ${result} ${user} - ${result} = Execute ozone auditparser "${auditworkdir}/audit.db" query "select count(*) from audit where op='CREATE_VOLUME' and RESULT='SUCCESS'" + ${result} = Execute ozone debug auditparser "${auditworkdir}/audit.db" query "select count(*) from audit where op='CREATE_VOLUME' and RESULT='SUCCESS'" ${result} = Convert To Number ${result} Should be true ${result}>=1 - ${result} = Execute ozone auditparser "${auditworkdir}/audit.db" query "select count(*) from audit where op='CREATE_BUCKET' and RESULT='SUCCESS'" + ${result} = Execute ozone debug auditparser "${auditworkdir}/audit.db" query "select count(*) from audit where op='CREATE_BUCKET' and RESULT='SUCCESS'" ${result} = Convert To Number ${result} Should be true ${result}>=${buckets} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot index e0fcd50ac12..20689b7c0f5 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot @@ -27,8 +27,8 @@ ${TESTFILE} testfile ${CORRUPT_DATANODE} ozone_datanode_1.ozone_default *** Test Cases *** -Test ozone debug read-replicas with corrupt block replica - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with corrupt block replica + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot index e8385bb5938..42ae5dec7e9 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot @@ -14,7 +14,7 @@ # limitations under the License. *** Settings *** -Documentation Test read-replicas in case of one datanode is dead +Documentation Test checksums in case of one datanode is dead Library OperatingSystem Resource ../lib/os.robot Resource ozone-debug.robot @@ -26,8 +26,8 @@ ${BUCKET} cli-debug-bucket ${TESTFILE} testfile *** Test Cases *** -Test ozone debug read-replicas with one datanode DEAD - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with one datanode DEAD + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot index c7cc7aaf3aa..36cef5e6651 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot @@ -14,7 +14,7 @@ # limitations under the License. *** Settings *** -Documentation Test read-replicas in case of one datanode is stale +Documentation Test checksums in case of one datanode is stale Library OperatingSystem Resource ../lib/os.robot Resource ozone-debug.robot @@ -27,8 +27,8 @@ ${TESTFILE} testfile ${STALE_DATANODE} ozone_datanode_1.ozone_default *** Test Cases *** -Test ozone debug read-replicas with one datanode STALE - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with one datanode STALE + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot index 5b3638040a7..57227458cc1 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot @@ -44,13 +44,13 @@ Create EC key *** Test Cases *** 0 data block Create EC key 1000 0 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 1 1 data block Create EC key 1048576 1 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 ${sum_size} = Evaluate 1048576 * 3 @@ -58,7 +58,7 @@ Create EC key 2 data blocks Create EC key 1048576 2 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 4 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 @@ -66,7 +66,7 @@ Create EC key 3 data blocks Create EC key 1048576 3 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 5 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 @@ -74,7 +74,7 @@ Create EC key 3 data blocks and partial stripe Create EC key 1000000 4 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 ${sum_size_last_stripe} = Evaluate ((1000000 * 4) % 1048576) * 3 @@ -84,7 +84,7 @@ Create EC key 4 data blocks and partial stripe Create EC key 1000000 5 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 ${sum_size_last_stripe} = Evaluate 1048576 * 3 + ((1000000 * 5) % 1048576) @@ -94,7 +94,7 @@ Create EC key 6 data blocks Create EC key 1048576 6 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 Should Be Equal As Integers ${count_files} 11 diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot index 692f2791e20..52d48c25f77 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot @@ -39,13 +39,13 @@ Create EC key *** Test Cases *** 0 data block Create EC key 1048576 0 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 1 1 data block Create EC key 1048576 1 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 ${sum_size} = Evaluate 1048576 * 4 @@ -53,7 +53,7 @@ Create EC key 2 data blocks Create EC key 1048576 2 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 5 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 @@ -61,7 +61,7 @@ Create EC key 3 data blocks Create EC key 1048576 3 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 6 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 @@ -69,7 +69,7 @@ Create EC key 4 data blocks Create EC key 1048576 4 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 7 Should Be Equal As Integers ${count_files} 10 @@ -77,7 +77,7 @@ Create EC key 5 data blocks Create EC key 1048576 5 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 8 Should Be Equal As Integers ${count_files} 10 @@ -85,7 +85,7 @@ Create EC key 6 data blocks Create EC key 1048576 6 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 Should Be Equal As Integers ${count_files} 10 @@ -93,7 +93,7 @@ Create EC key 6 data blocks and partial stripe Create EC key 1000000 7 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 ${sum_size_last_stripe} = Evaluate ((1000000 * 7) % 1048576) * 4 @@ -103,10 +103,10 @@ Create EC key 7 data blocks and partial stripe Create EC key 1000000 8 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 ${sum_size_last_stripe} = Evaluate 1048576 * 4 + ((1000000 * 8) % 1048576) Should Be Equal As Integers ${count_files} 19 Verify Healthy EC Replica ${directory} 1 ${sum_size} - Verify Healthy EC Replica ${directory} 2 ${sum_size_last_stripe} \ No newline at end of file + Verify Healthy EC Replica ${directory} 2 ${sum_size_last_stripe} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index 4e013e2a64b..803ab19ade8 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -37,7 +37,7 @@ Write keys *** Test Cases *** Test ozone debug read-replicas - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot index fb3e0f41586..9bb77d00d6d 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot @@ -19,8 +19,8 @@ Library Collections Resource ../lib/os.robot *** Keywords *** -Execute read-replicas CLI tool - Execute ozone debug -Dozone.network.topology.aware.read=true read-replicas --output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE} +Execute replicas verify checksums CLI tool + Execute ozone debug -Dozone.network.topology.aware.read=true replicas verify --checksums --output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE} ${directory} = Execute ls -d ${TEMP_DIR}/${VOLUME}_${BUCKET}_${TESTFILE}_*/ | tail -n 1 Directory Should Exist ${directory} File Should Exist ${directory}/${TESTFILE}_manifest diff --git a/hadoop-ozone/dist/src/shell/ozone/ozone b/hadoop-ozone/dist/src/shell/ozone/ozone index d868f0217e4..08bbf1acfe6 100755 --- a/hadoop-ozone/dist/src/shell/ozone/ozone +++ b/hadoop-ozone/dist/src/shell/ozone/ozone @@ -36,7 +36,6 @@ function ozone_usage ozone_add_option "--jvmargs arguments" "append JVM options to any existing options defined in the OZONE_OPTS environment variable. Any defined in OZONE_CLIENT_OPTS will be append after these jvmargs" ozone_add_option "--validate (continue)" "validates if all jars as indicated in the corresponding OZONE_RUN_ARTIFACT_NAME classpath file are present, command execution shall continue post validation failure if 'continue' is passed" - ozone_add_subcommand "auditparser" client "runs audit parser tool" ozone_add_subcommand "classpath" client "prints the class path needed for running ozone commands" ozone_add_subcommand "datanode" daemon "run a HDDS datanode" ozone_add_subcommand "envvars" client "display computed Hadoop environment variables" @@ -83,10 +82,6 @@ function ozonecmd_case RATIS_OPTS="-Dorg.apache.ratis.thirdparty.io.netty.allocator.useCacheForAllThreads=false ${RATIS_OPTS}" case ${subcmd} in - auditparser) - OZONE_CLASSNAME=org.apache.hadoop.ozone.audit.parser.AuditParser - OZONE_RUN_ARTIFACT_NAME="ozone-tools" - ;; classpath) if [[ "$#" -gt 0 ]]; then OZONE_RUN_ARTIFACT_NAME="$1" @@ -309,7 +304,13 @@ if [[ $# = 0 ]]; then fi OZONE_SUBCMD=$1 -shift + +if [[ "$OZONE_SUBCMD" == "auditparser" ]]; then + echo "warning: 'ozone auditparser' is deprecated, use 'ozone debug auditparser' instead." + OZONE_SUBCMD="debug" +else + shift +fi if ozone_need_reexec ozone "${OZONE_SUBCMD}"; then diff --git a/hadoop-ozone/httpfsgateway/dev-support/findbugsExcludeFile.xml b/hadoop-ozone/httpfsgateway/dev-support/findbugsExcludeFile.xml index ea725446e42..40d78d0cd6c 100644 --- a/hadoop-ozone/httpfsgateway/dev-support/findbugsExcludeFile.xml +++ b/hadoop-ozone/httpfsgateway/dev-support/findbugsExcludeFile.xml @@ -15,24 +15,4 @@ limitations under the License. --> - - - - - - - - - - - - - - - - - - - - diff --git a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/fs/http/server/HttpFSServerWebApp.java b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/fs/http/server/HttpFSServerWebApp.java index 8f9d8f3d6e5..b5a1736b0ed 100644 --- a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/fs/http/server/HttpFSServerWebApp.java +++ b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/fs/http/server/HttpFSServerWebApp.java @@ -18,6 +18,7 @@ package org.apache.ozone.fs.http.server; import java.io.IOException; +import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdds.annotation.InterfaceAudience; @@ -36,7 +37,7 @@ * implementation that is wired in HttpFSServer's WAR * WEB-INF/web.xml. *

- * It provides acces to the server context via the singleton {@link #get}. + * It provides access to the server context via the singleton {@link #get}. *

* All the configuration is loaded from configuration properties prefixed * with httpfs.. @@ -56,8 +57,8 @@ public class HttpFSServerWebApp extends ServerWebApp { */ public static final String CONF_ADMIN_GROUP = "admin.group"; - private static HttpFSServerWebApp server; - private static HttpFSServerMetrics metrics; + private static final AtomicReference SERVER = new AtomicReference<>(); + private static final AtomicReference METRICS = new AtomicReference<>(); private String adminGroup; @@ -80,13 +81,12 @@ public HttpFSServerWebApp() throws IOException { */ @Override public void init() throws ServerException { - if (server != null) { + if (!SERVER.compareAndSet(null, this)) { throw new RuntimeException("HttpFSServer server already initialized"); } - server = this; super.init(); adminGroup = getConfig().get(getPrefixedName(CONF_ADMIN_GROUP), "admin"); - LOG.info("Connects to Namenode [{}]", + LOG.info("Connects to FileSystem [{}]", get().get(FileSystemAccess.class).getFileSystemConfiguration(). get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)); setMetrics(getConfig()); @@ -97,7 +97,8 @@ public void init() throws ServerException { */ @Override public void destroy() { - server = null; + SERVER.set(null); + HttpFSServerMetrics metrics = METRICS.getAndSet(null); if (metrics != null) { metrics.shutdown(); } @@ -106,11 +107,11 @@ public void destroy() { private static void setMetrics(Configuration config) { LOG.info("Initializing HttpFSServerMetrics"); - metrics = HttpFSServerMetrics.create(config, "HttpFSServer"); + METRICS.updateAndGet(prev -> prev != null ? prev : HttpFSServerMetrics.create(config, "HttpFSServer")); JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(); pauseMonitor.init(config); pauseMonitor.start(); - metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); + METRICS.get().getJvmMetrics().setPauseMonitor(pauseMonitor); FSOperations.setBufferSize(config); DefaultMetricsSystem.initialize("HttpFSServer"); } @@ -121,7 +122,7 @@ private static void setMetrics(Configuration config) { * @return the HttpFSServer server singleton. */ public static HttpFSServerWebApp get() { - return server; + return SERVER.get(); } /** @@ -129,7 +130,7 @@ public static HttpFSServerWebApp get() { * @return the HttpFSServerMetrics singleton. */ public static HttpFSServerMetrics getMetrics() { - return metrics; + return METRICS.get(); } /** diff --git a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/service/instrumentation/InstrumentationService.java b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/service/instrumentation/InstrumentationService.java index 8c1d746914d..69652ef8f9e 100644 --- a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/service/instrumentation/InstrumentationService.java +++ b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/service/instrumentation/InstrumentationService.java @@ -129,43 +129,23 @@ private T getToAdd(String group, Class klass, Lock lock, Map> map) { - boolean locked = false; + lock.lock(); try { - Map groupMap = map.get(group); - if (groupMap == null) { - lock.lock(); - locked = true; - groupMap = map.get(group); - if (groupMap == null) { - groupMap = new ConcurrentHashMap(); - map.put(group, groupMap); - } - } - T element = groupMap.get(name); - if (element == null) { - if (!locked) { - lock.lock(); - locked = true; - } - element = groupMap.get(name); - if (element == null) { - try { - if (klass == Timer.class) { - element = (T) new Timer(timersSize); - } else { - element = klass.newInstance(); + return map + .computeIfAbsent(group, k -> new ConcurrentHashMap<>()) + .computeIfAbsent(name, k -> { + try { + if (klass == Timer.class) { + return (T) new Timer(timersSize); + } else { + return klass.newInstance(); + } + } catch (Exception ex) { + throw new RuntimeException(ex); } - } catch (Exception ex) { - throw new RuntimeException(ex); - } - groupMap.put(name, element); - } - } - return element; + }); } finally { - if (locked) { - lock.unlock(); - } + lock.unlock(); } } diff --git a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/servlet/ServerWebApp.java b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/servlet/ServerWebApp.java index 5378ab94168..7d59b42e0b3 100644 --- a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/servlet/ServerWebApp.java +++ b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/servlet/ServerWebApp.java @@ -191,7 +191,7 @@ public InetSocketAddress getAuthority() throws ServerException { if (authority == null) { authority = resolveAuthority(); } + return authority; } - return authority; } } diff --git a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/wsrs/Param.java b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/wsrs/Param.java index b9147b03abb..5fecc16647b 100644 --- a/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/wsrs/Param.java +++ b/hadoop-ozone/httpfsgateway/src/main/java/org/apache/ozone/lib/wsrs/Param.java @@ -18,6 +18,7 @@ package org.apache.ozone.lib.wsrs; import java.text.MessageFormat; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.annotation.InterfaceAudience; /** @@ -42,7 +43,7 @@ public String getName() { public T parseParam(String str) { try { - value = (str != null && !str.trim().isEmpty()) ? parse(str) : value; + value = StringUtils.isNotBlank(str) ? parse(str) : value; } catch (Exception ex) { throw new IllegalArgumentException( MessageFormat.format("Parameter [{0}], invalid value [{1}], " + diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java index 866393ec0db..167559e324e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java @@ -22,8 +22,6 @@ import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_ROOT; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; @@ -32,6 +30,8 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD; import static org.apache.hadoop.ozone.om.OmUpgradeConfig.ConfigStrings.OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isDone; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isStarting; import static org.apache.ozone.test.LambdaTestUtils.await; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 70e7fb5e5bd..23c7bf3930e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -454,10 +454,10 @@ public void testBlockDeletingThrottling() throws Exception { GenericTestUtils.waitFor(() -> { NodeManager nodeManager = cluster.getStorageContainerManager() .getScmNodeManager(); - List commands = nodeManager.processHeartbeat( + List> commands = nodeManager.processHeartbeat( nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0)); if (commands != null) { - for (SCMCommand cmd : commands) { + for (SCMCommand cmd : commands) { if (cmd.getType() == SCMCommandProto.Type.deleteBlocksCommand) { List deletedTXs = ((DeleteBlocksCommand) cmd).blocksTobeDeleted(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java index 8ae2f59ebb4..f050edf935e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; @@ -134,9 +136,6 @@ public void testAllocateContainerWithDifferentOwner() throws IOException { assertNotNull(info); String newContainerOwner = "OZONE_NEW"; - ContainerWithPipeline container2 = scm.getClientProtocolServer() - .allocateContainer(SCMTestUtils.getReplicationType(conf), - SCMTestUtils.getReplicationFactor(conf), newContainerOwner); ContainerInfo info2 = containerManager .getMatchingContainer(OzoneConsts.GB * 3, newContainerOwner, container1.getPipeline()); @@ -267,13 +266,15 @@ public void testGetMatchingContainerMultipleThreads() } } + void assertContainerCount(LifeCycleState state, int expected) { + final int computed = containerStateManager.getContainerCount(state); + assertEquals(expected, computed); + } + @Test public void testUpdateContainerState() throws IOException, - InvalidStateTransitionException, TimeoutException { - Set containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.OPEN); - int containers = containerList == null ? 0 : containerList.size(); - assertEquals(0, containers); + InvalidStateTransitionException { + assertContainerCount(LifeCycleState.OPEN, 0); // Allocate container1 and update its state from // OPEN -> CLOSING -> CLOSED -> DELETING -> DELETED @@ -281,37 +282,20 @@ public void testUpdateContainerState() throws IOException, .allocateContainer( SCMTestUtils.getReplicationType(conf), SCMTestUtils.getReplicationFactor(conf), OzoneConsts.OZONE); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.OPEN); - assertEquals(1, containerList.size()); + final ContainerID id1 = container1.getContainerInfo().containerID(); + assertContainerCount(LifeCycleState.OPEN, 1); - containerManager - .updateContainerState(container1.getContainerInfo().containerID(), - HddsProtos.LifeCycleEvent.FINALIZE); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.CLOSING); - assertEquals(1, containerList.size()); + containerManager.updateContainerState(id1, LifeCycleEvent.FINALIZE); + assertContainerCount(LifeCycleState.CLOSING, 1); - containerManager - .updateContainerState(container1.getContainerInfo().containerID(), - HddsProtos.LifeCycleEvent.CLOSE); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.CLOSED); - assertEquals(1, containerList.size()); + containerManager.updateContainerState(id1, LifeCycleEvent.CLOSE); + assertContainerCount(LifeCycleState.CLOSED, 1); - containerManager - .updateContainerState(container1.getContainerInfo().containerID(), - HddsProtos.LifeCycleEvent.DELETE); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETING); - assertEquals(1, containerList.size()); + containerManager.updateContainerState(id1, LifeCycleEvent.DELETE); + assertContainerCount(LifeCycleState.DELETING, 1); - containerManager - .updateContainerState(container1.getContainerInfo().containerID(), - HddsProtos.LifeCycleEvent.CLEANUP); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerList.size()); + containerManager.updateContainerState(id1, LifeCycleEvent.CLEANUP); + assertContainerCount(LifeCycleState.DELETED, 1); // Allocate container1 and update its state from // OPEN -> CLOSING -> CLOSED @@ -328,9 +312,7 @@ public void testUpdateContainerState() throws IOException, containerManager .updateContainerState(container1.getContainerInfo().containerID(), HddsProtos.LifeCycleEvent.CLOSE); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.CLOSED); - assertEquals(1, containerList.size()); + assertContainerCount(LifeCycleState.CLOSED, 1); } @@ -367,43 +349,43 @@ public void testReplicaMap() throws Exception { .setContainerState(ContainerReplicaProto.State.OPEN) .setDatanodeDetails(dn2) .build(); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica(replicaOne); + containerStateManager.updateContainerReplica(replicaTwo); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(2, replicaSet.size()); assertThat(replicaSet).contains(replicaOne); assertThat(replicaSet).contains(replicaTwo); // Test 3: Remove one replica node and then test - containerStateManager.removeContainerReplica(id, replicaOne); + containerStateManager.removeContainerReplica(replicaOne); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(1, replicaSet.size()); assertThat(replicaSet).doesNotContain(replicaOne); assertThat(replicaSet).contains(replicaTwo); // Test 3: Remove second replica node and then test - containerStateManager.removeContainerReplica(id, replicaTwo); + containerStateManager.removeContainerReplica(replicaTwo); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(0, replicaSet.size()); assertThat(replicaSet).doesNotContain(replicaOne); assertThat(replicaSet).doesNotContain(replicaTwo); // Test 4: Re-insert dn1 - containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(replicaOne); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(1, replicaSet.size()); assertThat(replicaSet).contains(replicaOne); assertThat(replicaSet).doesNotContain(replicaTwo); // Re-insert dn2 - containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica(replicaTwo); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(2, replicaSet.size()); assertThat(replicaSet).contains(replicaOne); assertThat(replicaSet).contains(replicaTwo); // Re-insert dn1 - containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(replicaOne); replicaSet = containerStateManager.getContainerReplicas(id); assertEquals(2, replicaSet.size()); assertThat(replicaSet).contains(replicaOne); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestScmApplyTransactionFailure.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestScmApplyTransactionFailure.java index 84c31f088d0..2bb67e9a884 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestScmApplyTransactionFailure.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestScmApplyTransactionFailure.java @@ -80,7 +80,7 @@ public void testAddContainerToClosedPipeline() throws Exception { InvalidPipelineStateException.class); assertThrows(ContainerNotFoundException.class, () -> containerManager.getContainer( - new ContainerID(containerInfo.getContainerID()))); + ContainerID.valueOf(containerInfo.getContainerID()))); // verify that SCMStateMachine is still functioning after the rejected // transaction. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index e553d32eed1..d79c3126628 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -268,7 +268,7 @@ public void testOrphanBlock() throws Exception { Token orphanContainerToken = containerTokenGenerator.generateToken( - ANY_USER, new ContainerID(orphanContainerID)); + ANY_USER, ContainerID.valueOf(orphanContainerID)); // Close the container by closing the pipeline scm.getPipelineManager().closePipeline(orphanPipeline, false); @@ -689,7 +689,7 @@ private void testECReconstructionCoordinator(List missingIndexes, OzoneKeyDetails key = bucket.getKey(keyString); long conID = key.getOzoneKeyLocations().get(0).getContainerID(); Token cToken = containerTokenGenerator - .generateToken(ANY_USER, new ContainerID(conID)); + .generateToken(ANY_USER, ContainerID.valueOf(conID)); //Close the container first. closeContainer(conID); @@ -876,7 +876,7 @@ public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure() OzoneKeyDetails key = bucket.getKey(keyString); long conID = key.getOzoneKeyLocations().get(0).getContainerID(); Token cToken = - containerTokenGenerator.generateToken(ANY_USER, new ContainerID(conID)); + containerTokenGenerator.generateToken(ANY_USER, ContainerID.valueOf(conID)); closeContainer(conID); Pipeline containerPipeline = scm.getPipelineManager().getPipeline( @@ -1050,7 +1050,7 @@ public static void prepareData(int[][] ranges) throws Exception { blockTokenGenerator = new OzoneBlockTokenSecretManager( tokenLifetime, secretKeyClient); containerToken = containerTokenGenerator - .generateToken(ANY_USER, new ContainerID(containerID)); + .generateToken(ANY_USER, ContainerID.valueOf(containerID)); } public static void stopCluster() throws IOException { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index 368d3df1c7f..9fa8b799593 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -17,8 +17,8 @@ package org.apache.hadoop.ozone; -import com.amazonaws.services.s3.AmazonS3; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.TimeoutException; @@ -34,12 +34,10 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.recon.ReconServer; -import org.apache.hadoop.ozone.s3.Gateway; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.util.ExitUtils; import org.apache.ratis.util.function.CheckedFunction; -import software.amazon.awssdk.services.s3.S3Client; /** * Interface used for MiniOzoneClusters. @@ -147,13 +145,6 @@ void waitForPipelineTobeReady(HddsProtos.ReplicationFactor factor, */ ReconServer getReconServer(); - /** - * Returns a {@link Gateway} instance. - * - * @return {@link Gateway} instance if it is initialized, otherwise null. - */ - Gateway getS3G(); - /** * Returns an {@link OzoneClient} to access the {@link MiniOzoneCluster}. * The caller is responsible for closing the client after use. @@ -162,16 +153,6 @@ void waitForPipelineTobeReady(HddsProtos.ReplicationFactor factor, */ OzoneClient newClient() throws IOException; - /** - * Returns an {@link AmazonS3} to use AWS SDK V1 to access the {@link MiniOzoneCluster}. - */ - AmazonS3 newS3Client(); - - /** - * Returns an {@link S3Client} to use AWS SDK V2 to access the {@link MiniOzoneCluster}. - */ - S3Client newS3ClientV2() throws Exception; - /** * Returns StorageContainerLocationClient to communicate with * {@link StorageContainerManager} associated with the MiniOzoneCluster. @@ -237,21 +218,6 @@ void restartHddsDatanode(DatanodeDetails dn, boolean waitForDatanode) */ void stopRecon(); - /** - * Start S3G. - */ - void startS3G(); - - /** - * Restart S3G. - */ - void restartS3G(); - - /** - * Stop S3G. - */ - void stopS3G(); - /** * Shutdown the MiniOzoneCluster and delete the storage dirs. */ @@ -307,13 +273,13 @@ abstract class Builder { protected String omId = UUID.randomUUID().toString(); protected boolean includeRecon = false; - protected boolean includeS3G = false; protected int numOfDatanodes = 3; protected boolean startDataNodes = true; protected CertificateClient certClient; protected SecretKeyClient secretKeyClient; protected DatanodeFactory dnFactory = UniformDatanodesFactory.newBuilder().build(); + private final List services = new ArrayList<>(); protected Builder(OzoneConfiguration conf) { this.conf = conf; @@ -390,11 +356,15 @@ public Builder includeRecon(boolean include) { return this; } - public Builder includeS3G(boolean include) { - this.includeS3G = include; + public Builder addService(Service service) { + services.add(service); return this; } + public List getServices() { + return services; + } + /** * Constructs and returns MiniOzoneCluster. * @@ -409,4 +379,10 @@ public Builder includeS3G(boolean include) { interface DatanodeFactory extends CheckedFunction { // marker } + + /** Service to manage as part of the mini cluster. */ + interface Service { + void start(OzoneConfiguration conf) throws Exception; + void stop() throws Exception; + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 4e26e3d580f..4a154ef2763 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -23,29 +23,15 @@ import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DATANODE_ADDRESS_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_HTTP_ADDRESS_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD; -import static org.apache.hadoop.hdds.server.http.HttpConfig.getHttpPolicy; -import static org.apache.hadoop.hdds.server.http.HttpServer2.HTTPS_SCHEME; -import static org.apache.hadoop.hdds.server.http.HttpServer2.HTTP_SCHEME; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DB_DIR; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_OM_SNAPSHOT_DB_DIR; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_DB_DIR; -import static org.apache.hadoop.ozone.s3.S3GatewayConfigKeys.OZONE_S3G_HTTPS_ADDRESS_KEY; -import static org.apache.hadoop.ozone.s3.S3GatewayConfigKeys.OZONE_S3G_HTTP_ADDRESS_KEY; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.anyHostWithFreePort; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.localhostWithFreePort; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import java.io.File; import java.io.IOException; -import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -82,7 +68,6 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.server.http.HttpConfig; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.utils.db.CodecBuffer; import org.apache.hadoop.hdds.utils.db.CodecTestUtil; @@ -98,19 +83,11 @@ import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.recon.ConfigurationProvider; import org.apache.hadoop.ozone.recon.ReconServer; -import org.apache.hadoop.ozone.s3.Gateway; -import org.apache.hadoop.ozone.s3.OzoneClientCache; -import org.apache.hadoop.ozone.s3.OzoneConfigurationHolder; -import org.apache.hadoop.ozone.s3.S3GatewayConfigKeys; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.ozone.recon.schema.ReconSqlDbConfig; import org.apache.ozone.test.GenericTestUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; /** * MiniOzoneCluster creates a complete in-process Ozone cluster suitable for @@ -135,7 +112,7 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster { private OzoneManager ozoneManager; private final List hddsDatanodes; private ReconServer reconServer; - private Gateway s3g; + private final List services; // Timeout for the cluster to be ready private int waitForClusterToBeReadyTimeout = 120000; // 2 min @@ -151,15 +128,14 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, OzoneManager ozoneManager, StorageContainerManager scm, List hddsDatanodes, - ReconServer reconServer, - Gateway s3g) { + ReconServer reconServer, List services) { this.conf = conf; this.ozoneManager = ozoneManager; this.scm = scm; this.hddsDatanodes = hddsDatanodes; this.reconServer = reconServer; this.scmConfigurator = scmConfigurator; - this.s3g = s3g; + this.services = services; } /** @@ -169,11 +145,12 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, * OzoneManagers and StorageContainerManagers. */ MiniOzoneClusterImpl(OzoneConfiguration conf, SCMConfigurator scmConfigurator, - List hddsDatanodes, ReconServer reconServer) { + List hddsDatanodes, ReconServer reconServer, List services) { this.scmConfigurator = scmConfigurator; this.conf = conf; this.hddsDatanodes = hddsDatanodes; this.reconServer = reconServer; + this.services = services; } public SCMConfigurator getSCMConfigurator() { @@ -283,11 +260,6 @@ public ReconServer getReconServer() { return this.reconServer; } - @Override - public Gateway getS3G() { - return this.s3g; - } - @Override public int getHddsDatanodeIndex(DatanodeDetails dn) throws IOException { for (HddsDatanodeService service : hddsDatanodes) { @@ -306,91 +278,6 @@ public OzoneClient newClient() throws IOException { return client; } - @Override - public AmazonS3 newS3Client() { - // TODO: Parameterize tests between Virtual host style and Path style - return createS3Client(true); - } - - @Override - public S3Client newS3ClientV2() throws Exception { - return createS3ClientV2(true); - } - - public AmazonS3 createS3Client(boolean enablePathStyle) { - final String accessKey = "user"; - final String secretKey = "password"; - final Regions region = Regions.DEFAULT_REGION; - - final String protocol; - final HttpConfig.Policy webPolicy = getHttpPolicy(conf); - String host; - - if (webPolicy.isHttpsEnabled()) { - // TODO: Currently HTTPS is disabled in the test, we can add HTTPS - // integration in the future - protocol = HTTPS_SCHEME; - host = conf.get(OZONE_S3G_HTTPS_ADDRESS_KEY); - } else { - protocol = HTTP_SCHEME; - host = conf.get(OZONE_S3G_HTTP_ADDRESS_KEY); - } - - String endpoint = protocol + "://" + host; - - AWSCredentialsProvider credentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials(accessKey, secretKey) - ); - - - ClientConfiguration clientConfiguration = new ClientConfiguration(); - LOG.info("S3 Endpoint is {}", endpoint); - - return AmazonS3ClientBuilder.standard() - .withPathStyleAccessEnabled(enablePathStyle) - .withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration( - endpoint, region.getName() - ) - ) - .withClientConfiguration(clientConfiguration) - .withCredentials(credentials) - .build(); - } - - public S3Client createS3ClientV2(boolean enablePathStyle) throws Exception { - final String accessKey = "user"; - final String secretKey = "password"; - final Region region = Region.US_EAST_1; - - final String protocol; - final HttpConfig.Policy webPolicy = getHttpPolicy(conf); - String host; - - if (webPolicy.isHttpsEnabled()) { - // TODO: Currently HTTPS is disabled in the test, we can add HTTPS - // integration in the future - protocol = HTTPS_SCHEME; - host = conf.get(OZONE_S3G_HTTPS_ADDRESS_KEY); - } else { - protocol = HTTP_SCHEME; - host = conf.get(OZONE_S3G_HTTP_ADDRESS_KEY); - } - - String endpoint = protocol + "://" + host; - - LOG.info("S3 Endpoint is {}", endpoint); - - AwsBasicCredentials credentials = AwsBasicCredentials.create(accessKey, secretKey); - - return S3Client.builder() - .region(region) - .endpointOverride(new URI(endpoint)) - .credentialsProvider(StaticCredentialsProvider.create(credentials)) - .forcePathStyle(enablePathStyle) - .build(); - } - protected OzoneClient createClient() throws IOException { return OzoneClientFactory.getRpcClient(conf); } @@ -524,7 +411,7 @@ public void stop() { stopDatanodes(hddsDatanodes); stopSCM(scm); stopRecon(reconServer); - stopS3G(s3g); + stopServices(services); } private void startHddsDatanode(HddsDatanodeService datanode) { @@ -564,21 +451,10 @@ public void stopRecon() { stopRecon(reconServer); } - @Override - public void startS3G() { - s3g = new Gateway(); - s3g.execute(NO_ARGS); - } - - @Override - public void restartS3G() { - stopS3G(s3g); - startS3G(); - } - - @Override - public void stopS3G() { - stopS3G(s3g); + public void startServices() throws Exception { + for (Service service : services) { + service.start(getConf()); + } } private CertificateClient getCAClient() { @@ -635,16 +511,18 @@ private static void stopRecon(ReconServer reconServer) { } } - private static void stopS3G(Gateway s3g) { - try { - if (s3g != null) { - LOG.info("Stopping S3G"); - // TODO (HDDS-11539): Remove this workaround once the @PreDestroy issue is fixed - OzoneClientCache.closeClient(); - s3g.stop(); + private static void stopServices(List services) { + // stop in reverse order + List reverse = new ArrayList<>(services); + Collections.reverse(reverse); + + for (Service service : reverse) { + try { + service.stop(); + LOG.info("Stopped {}", service); + } catch (Exception e) { + LOG.error("Error stopping {}", service, e); } - } catch (Exception e) { - LOG.error("Exception while shutting down S3 Gateway.", e); } } @@ -671,23 +549,22 @@ public MiniOzoneCluster build() throws IOException { OzoneManager om = null; ReconServer reconServer = null; List hddsDatanodes = Collections.emptyList(); - Gateway s3g = null; try { scm = createAndStartSingleSCM(); om = createAndStartSingleOM(); - s3g = createS3G(); reconServer = createRecon(); hddsDatanodes = createHddsDatanodes(); MiniOzoneClusterImpl cluster = new MiniOzoneClusterImpl(conf, scmConfigurator, om, scm, - hddsDatanodes, reconServer, s3g); + hddsDatanodes, reconServer, getServices()); cluster.setCAClient(certClient); cluster.setSecretKeyClient(secretKeyClient); if (startDataNodes) { cluster.startHddsDatanodes(); } + cluster.startServices(); prepareForNextBuild(); return cluster; @@ -696,9 +573,7 @@ public MiniOzoneCluster build() throws IOException { if (includeRecon) { stopRecon(reconServer); } - if (includeS3G) { - stopS3G(s3g); - } + stopServices(getServices()); if (startDataNodes) { stopDatanodes(hddsDatanodes); } @@ -867,16 +742,6 @@ protected ReconServer createRecon() { return reconServer; } - protected Gateway createS3G() { - Gateway s3g = null; - if (includeS3G) { - configureS3G(); - s3g = new Gateway(); - s3g.execute(NO_ARGS); - } - return s3g; - } - /** * Creates HddsDatanodeService(s) instance. * @@ -941,15 +806,5 @@ protected void configureRecon() { ConfigurationProvider.setConfiguration(conf); } - - private void configureS3G() { - OzoneConfigurationHolder.resetConfiguration(); - - conf.set(S3GatewayConfigKeys.OZONE_S3G_HTTP_ADDRESS_KEY, localhostWithFreePort()); - conf.set(S3GatewayConfigKeys.OZONE_S3G_HTTPS_ADDRESS_KEY, localhostWithFreePort()); - - OzoneConfigurationHolder.setConfiguration(conf); - } - } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index 7499bb234ca..cdbabaac95e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone; +import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; @@ -86,7 +87,7 @@ public MiniOzoneHAClusterImpl( List hddsDatanodes, String clusterPath, ReconServer reconServer) { - super(conf, scmConfigurator, hddsDatanodes, reconServer); + super(conf, scmConfigurator, hddsDatanodes, reconServer, emptyList()); this.omhaService = omhaService; this.scmhaService = scmhaService; this.clusterMetaPath = clusterPath; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java index d7eb78ad161..1aed5b76d3d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java @@ -23,11 +23,11 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose; +import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.io.OutputStream; @@ -41,7 +41,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.ObjectStore; @@ -52,15 +51,12 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; -import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; /** * Tests for container report handling. */ -@Flaky("HDDS-12535") public class TestContainerReportHandling { private static final String VOLUME = "vol1"; private static final String BUCKET = "bucket1"; @@ -97,6 +93,9 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID()); waitForContainerClose(cluster, containerID.getId()); + // also wait till the container is closed in SCM + waitForContainerStateInSCM(cluster.getStorageContainerManager(), containerID, HddsProtos.LifeCycleState.CLOSED); + // move the container to DELETING ContainerManager containerManager = cluster.getStorageContainerManager().getContainerManager(); containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE); @@ -111,14 +110,7 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor // restart a DN and wait for the container to get CLOSED. HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode()); cluster.restartHddsDatanode(dn.getDatanodeDetails(), false); - GenericTestUtils.waitFor(() -> { - try { - return containerManager.getContainer(containerID).getState() == HddsProtos.LifeCycleState.CLOSED; - } catch (ContainerNotFoundException e) { - fail(e); - } - return false; - }, 2000, 20000); + waitForContainerStateInSCM(cluster.getStorageContainerManager(), containerID, HddsProtos.LifeCycleState.CLOSED); assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState()); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java index 9517fd9e459..357945a3fa5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java @@ -23,11 +23,11 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose; +import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.io.OutputStream; @@ -35,13 +35,14 @@ import java.nio.file.Paths; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; @@ -53,15 +54,12 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; -import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; /** * Tests for container report handling with SCM High Availability. */ -@Flaky("HDDS-12535") public class TestContainerReportHandlingWithHA { private static final String VOLUME = "vol1"; private static final String BUCKET = "bucket1"; @@ -99,6 +97,8 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID()); waitForContainerClose(cluster, containerID.getId()); + waitForContainerStateInAllSCMs(cluster, containerID, HddsProtos.LifeCycleState.CLOSED); + // move the container to DELETING ContainerManager containerManager = cluster.getScmLeader().getContainerManager(); containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE); @@ -113,23 +113,8 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor // restart a DN and wait for the container to get CLOSED in all SCMs HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode()); cluster.restartHddsDatanode(dn.getDatanodeDetails(), false); - ContainerManager[] array = new ContainerManager[numSCM]; - for (int i = 0; i < numSCM; i++) { - array[i] = cluster.getStorageContainerManager(i).getContainerManager(); - } - GenericTestUtils.waitFor(() -> { - try { - for (ContainerManager manager : array) { - if (manager.getContainer(containerID).getState() != HddsProtos.LifeCycleState.CLOSED) { - return false; - } - } - return true; - } catch (ContainerNotFoundException e) { - fail(e); - } - return false; - }, 2000, 20000); + + waitForContainerStateInAllSCMs(cluster, containerID, HddsProtos.LifeCycleState.CLOSED); assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState()); } @@ -177,4 +162,12 @@ private void createTestData(OzoneClient client) throws IOException { } } + private static void waitForContainerStateInAllSCMs(MiniOzoneHAClusterImpl cluster, ContainerID containerID, + HddsProtos.LifeCycleState desiredState) + throws TimeoutException, InterruptedException { + for (StorageContainerManager scm : cluster.getStorageContainerManagersList()) { + waitForContainerStateInSCM(scm, containerID, desiredState); + } + } + } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java index 07138685862..c454facea07 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.security.MessageDigest; @@ -455,4 +456,20 @@ public static void setConfig(OzoneConfiguration conf, String key, String value) conf.set(key, value); } } + + public static void waitForContainerStateInSCM(StorageContainerManager scm, + ContainerID containerID, HddsProtos.LifeCycleState expectedState) + throws TimeoutException, InterruptedException { + ContainerManager containerManager = scm.getContainerManager(); + GenericTestUtils.waitFor(() -> { + try { + return containerManager.getContainer(containerID).getState() == expectedState; + } catch (ContainerNotFoundException e) { + LOG.error("Container {} not found while waiting for state {}", + containerID, expectedState, e); + fail("Container " + containerID + " not found while waiting for state " + expectedState + ": " + e); + return false; + } + }, 2000, 20000); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index e0b64c2f75b..cb03d450750 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -673,7 +673,7 @@ public void testContainerDeleteWithInvalidKeyCount() .setEmpty(true) .build(); // Update replica - containerStateManager.updateContainerReplica(containerId, replicaOne); + containerStateManager.updateContainerReplica(replicaOne); // Check replica updated with wrong keyCount scm.getContainerManager().getContainerReplicas( diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index e4d49af9e66..b9710b45455 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -146,7 +146,7 @@ protected void waitForScmToCloseContainer(long containerID) throws Exception { ContainerManager cm = cluster.getStorageContainerManager() .getContainerManager(); LambdaTestUtils.await(5000, 500, - () -> cm.getContainer(new ContainerID(containerID)).getState() + () -> cm.getContainer(ContainerID.valueOf(containerID)).getState() != HddsProtos.LifeCycleState.OPEN); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/multitenant/TestMultiTenantVolume.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/multitenant/TestMultiTenantVolume.java index 27d39b3d024..7d80fba7aa8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/multitenant/TestMultiTenantVolume.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/multitenant/TestMultiTenantVolume.java @@ -17,10 +17,10 @@ package org.apache.hadoop.ozone.om.multitenant; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_MULTITENANCY_ENABLED; import static org.apache.hadoop.ozone.om.OmUpgradeConfig.ConfigStrings.OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isDone; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isStarting; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java index f099e88cbea..8c5a623e9c8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java @@ -23,8 +23,6 @@ import static org.apache.hadoop.ozone.OzoneAcl.AclScope.DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ENABLE_FILESYSTEM_PATHS; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DIFF_DISABLE_NATIVE_LIBS; @@ -46,6 +44,8 @@ import static org.apache.hadoop.ozone.snapshot.SnapshotDiffResponse.JobStatus.CANCELLED; import static org.apache.hadoop.ozone.snapshot.SnapshotDiffResponse.JobStatus.DONE; import static org.apache.hadoop.ozone.snapshot.SnapshotDiffResponse.JobStatus.IN_PROGRESS; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isDone; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isStarting; import static org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.COLUMN_FAMILIES_TO_TRACK_IN_DAG; import static org.apache.ozone.test.LambdaTestUtils.await; import static org.assertj.core.api.Assertions.assertThat; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3ClientFactory.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3ClientFactory.java new file mode 100644 index 00000000000..5f80a0907c5 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3ClientFactory.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.s3; + +import static org.apache.hadoop.hdds.server.http.HttpConfig.getHttpPolicy; +import static org.apache.hadoop.hdds.server.http.HttpServer2.HTTPS_SCHEME; +import static org.apache.hadoop.hdds.server.http.HttpServer2.HTTP_SCHEME; +import static org.apache.hadoop.ozone.s3.S3GatewayConfigKeys.OZONE_S3G_HTTPS_ADDRESS_KEY; +import static org.apache.hadoop.ozone.s3.S3GatewayConfigKeys.OZONE_S3G_HTTP_ADDRESS_KEY; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import java.net.URI; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.server.http.HttpConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; + +/** + * Factory class for creating S3 clients. + */ +public class S3ClientFactory { + private static final Logger LOG = LoggerFactory.getLogger(S3ClientFactory.class); + private final OzoneConfiguration conf; + + /** + * Constructor for S3ClientFactory. + * + * @param conf OzoneConfiguration + */ + public S3ClientFactory(OzoneConfiguration conf) { + this.conf = conf; + } + + /** + * Creates an AmazonS3 client (AWS SDK V1) with path style access enabled. + * + * @return AmazonS3 client + */ + public AmazonS3 createS3Client() { + return createS3Client(true); + } + + /** + * Creates an AmazonS3 client (AWS SDK V1). + * + * @param enablePathStyle whether to enable path style access + * @return AmazonS3 client + */ + public AmazonS3 createS3Client(boolean enablePathStyle) { + final String accessKey = "user"; + final String secretKey = "password"; + final Regions region = Regions.DEFAULT_REGION; + + final String protocol; + final HttpConfig.Policy webPolicy = getHttpPolicy(conf); + String host; + + if (webPolicy.isHttpsEnabled()) { + // TODO: Currently HTTPS is disabled in the test, we can add HTTPS + // integration in the future + protocol = HTTPS_SCHEME; + host = conf.get(OZONE_S3G_HTTPS_ADDRESS_KEY); + } else { + protocol = HTTP_SCHEME; + host = conf.get(OZONE_S3G_HTTP_ADDRESS_KEY); + } + + String endpoint = protocol + "://" + host; + + AWSCredentialsProvider credentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials(accessKey, secretKey)); + + ClientConfiguration clientConfiguration = new ClientConfiguration(); + LOG.info("S3 Endpoint is {}", endpoint); + + return AmazonS3ClientBuilder.standard() + .withPathStyleAccessEnabled(enablePathStyle) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration( + endpoint, region.getName())) + .withClientConfiguration(clientConfiguration) + .withCredentials(credentials) + .build(); + } + + /** + * Creates an S3Client (AWS SDK V2) with path style access enabled. + * + * @return S3Client + * @throws Exception if there is an error creating the client + */ + public S3Client createS3ClientV2() throws Exception { + return createS3ClientV2(true); + } + + /** + * Creates an S3Client (AWS SDK V2). + * + * @param enablePathStyle whether to enable path style access + * @return S3Client + * @throws Exception if there is an error creating the client + */ + public S3Client createS3ClientV2(boolean enablePathStyle) throws Exception { + final String accessKey = "user"; + final String secretKey = "password"; + final Region region = Region.US_EAST_1; + + final String protocol; + final HttpConfig.Policy webPolicy = getHttpPolicy(conf); + String host; + + if (webPolicy.isHttpsEnabled()) { + // TODO: Currently HTTPS is disabled in the test, we can add HTTPS + // integration in the future + protocol = HTTPS_SCHEME; + host = conf.get(OZONE_S3G_HTTPS_ADDRESS_KEY); + } else { + protocol = HTTP_SCHEME; + host = conf.get(OZONE_S3G_HTTP_ADDRESS_KEY); + } + + String endpoint = protocol + "://" + host; + + LOG.info("S3 Endpoint is {}", endpoint); + + AwsBasicCredentials credentials = AwsBasicCredentials.create(accessKey, secretKey); + + return S3Client.builder() + .region(region) + .endpointOverride(new URI(endpoint)) + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .forcePathStyle(enablePathStyle) + .build(); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3GatewayService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3GatewayService.java new file mode 100644 index 00000000000..02c3a365a4a --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/S3GatewayService.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.s3; + +import static org.apache.ozone.test.GenericTestUtils.PortAllocator.localhostWithFreePort; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.ratis.util.Preconditions; + +/** S3 Gateway for {@link MiniOzoneCluster}. */ +public class S3GatewayService implements MiniOzoneCluster.Service { + + private static final String[] NO_ARGS = new String[0]; + + private Gateway s3g; + + @Override + public void start(OzoneConfiguration conf) throws Exception { + Preconditions.assertNull(s3g, "S3 Gateway already started"); + configureS3G(new OzoneConfiguration(conf)); + s3g = new Gateway(); + s3g.execute(NO_ARGS); + } + + @Override + public void stop() throws Exception { + Preconditions.assertNotNull(s3g, "S3 Gateway not running"); + s3g.stop(); + // TODO (HDDS-11539): Remove this workaround once the @PreDestroy issue is fixed + OzoneClientCache.closeClient(); + } + + @Override + public String toString() { + final Gateway instance = s3g; + return instance != null + ? "S3Gateway(http=" + instance.getHttpAddress() + ", https=" + instance.getHttpsAddress() + ")" + : "S3Gateway"; + } + + public OzoneConfiguration getConf() { + return OzoneConfigurationHolder.configuration(); + } + + private void configureS3G(OzoneConfiguration conf) { + OzoneConfigurationHolder.resetConfiguration(); + + conf.set(S3GatewayConfigKeys.OZONE_S3G_HTTP_ADDRESS_KEY, localhostWithFreePort()); + conf.set(S3GatewayConfigKeys.OZONE_S3G_HTTPS_ADDRESS_KEY, localhostWithFreePort()); + + OzoneConfigurationHolder.setConfiguration(conf); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java index 9efaafd290d..a4eefb81dba 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java @@ -101,6 +101,8 @@ import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.s3.S3ClientFactory; +import org.apache.hadoop.ozone.s3.S3GatewayService; import org.apache.ozone.test.OzoneTestBase; import org.junit.jupiter.api.MethodOrderer; import org.junit.jupiter.api.Test; @@ -181,12 +183,13 @@ public abstract class AbstractS3SDKV1Tests extends OzoneTestBase { * @throws Exception exception thrown when waiting for the cluster to be ready. */ static void startCluster(OzoneConfiguration conf) throws Exception { + S3GatewayService s3g = new S3GatewayService(); cluster = MiniOzoneCluster.newBuilder(conf) - .includeS3G(true) + .addService(s3g) .setNumDatanodes(5) .build(); cluster.waitForClusterToBeReady(); - s3Client = cluster.newS3Client(); + s3Client = new S3ClientFactory(s3g.getConf()).createS3Client(); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java index 53328f9e435..834580b6e8b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java @@ -39,6 +39,8 @@ import javax.xml.bind.DatatypeConverter; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.s3.S3ClientFactory; +import org.apache.hadoop.ozone.s3.S3GatewayService; import org.apache.ozone.test.OzoneTestBase; import org.junit.jupiter.api.MethodOrderer; import org.junit.jupiter.api.Test; @@ -87,12 +89,13 @@ public abstract class AbstractS3SDKV2Tests extends OzoneTestBase { * @throws Exception exception thrown when waiting for the cluster to be ready. */ static void startCluster(OzoneConfiguration conf) throws Exception { + S3GatewayService s3g = new S3GatewayService(); cluster = MiniOzoneCluster.newBuilder(conf) - .includeS3G(true) + .addService(s3g) .setNumDatanodes(5) .build(); cluster.waitForClusterToBeReady(); - s3Client = cluster.newS3ClientV2(); + s3Client = new S3ClientFactory(s3g.getConf()).createS3ClientV2(); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java index 7513e4fdfbb..54e9fdeff0a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java @@ -43,7 +43,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; -import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ContainerStateManager; @@ -158,8 +157,7 @@ private void updateContainerMetadata(long cid) throws Exception { getContainerManager().getContainerStateManager(); containerStateManager.addContainer(container.getProtobuf()); for (ContainerReplica replica: replicaSet) { - containerStateManager.updateContainerReplica( - ContainerID.valueOf(cid), replica); + containerStateManager.updateContainerReplica(replica); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 2d955e7cea6..de960d8c454 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -69,8 +69,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_READ_THREADPOOL_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_S3_GPRC_SERVER_ENABLED; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_S3_GRPC_SERVER_ENABLED_DEFAULT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_USER_MAX_VOLUME; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_USER_MAX_VOLUME_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_VOLUME_LISTALL_ALLOWED; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_VOLUME_LISTALL_ALLOWED_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SERVER_DEFAULT_REPLICATION_DEFAULT; @@ -438,9 +436,6 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private final ReplicationConfigValidator replicationConfigValidator; private boolean allowListAllVolumes; - // Adding parameters needed for VolumeRequests here, so that during request - // execution, we can get from ozoneManager. - private final long maxUserVolumeCount; private int minMultipartUploadPartSize = OzoneConsts.OM_MULTIPART_MIN_SIZE; @@ -549,10 +544,6 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) this.threadPrefix = omNodeDetails.threadNamePrefix(); loginOMUserIfSecurityEnabled(conf); setInstanceVariablesFromConf(); - this.maxUserVolumeCount = conf.getInt(OZONE_OM_USER_MAX_VOLUME, - OZONE_OM_USER_MAX_VOLUME_DEFAULT); - Preconditions.checkArgument(this.maxUserVolumeCount > 0, - OZONE_OM_USER_MAX_VOLUME + " value should be greater than zero"); if (omStorage.getState() != StorageState.INITIALIZED) { throw new OMException("OM not initialized, current OM storage state: " @@ -3040,7 +3031,7 @@ public String getNamespace() { @Override public String getRpcPort() { - return "" + omRpcAddress.getPort(); + return String.valueOf(omRpcAddress.getPort()); } private static List> getRatisRolesException(String exceptionString) { @@ -4206,7 +4197,7 @@ public String getComponent() { * @return maxUserVolumeCount */ public long getMaxUserVolumeCount() { - return maxUserVolumeCount; + return config.getMaxUserVolumeCount(); } /** * Return true, if the current OM node is leader and in ready state to diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/helpers/OMAuditLogger.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/helpers/OMAuditLogger.java index 44c5ce7d042..80c20f7af6d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/helpers/OMAuditLogger.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/helpers/OMAuditLogger.java @@ -120,7 +120,7 @@ public static void log(OMAuditLogger.Builder builder, TermIndex termIndex) { if (null == builder.getAuditMap()) { builder.setAuditMap(new HashMap<>()); } - builder.getAuditMap().put("Transaction", "" + termIndex.getIndex()); + builder.getAuditMap().put("Transaction", String.valueOf(termIndex.getIndex())); builder.getMessageBuilder().withParams(builder.getAuditMap()); builder.getAuditLogger().logWrite(builder.getMessageBuilder().build()); } @@ -150,7 +150,7 @@ public static void log(OMAuditLogger.Builder builder, OMClientRequest request, O } try { builder.getAuditMap().put("Command", request.getOmRequest().getCmdType().name()); - builder.getAuditMap().put("Transaction", "" + termIndex.getIndex()); + builder.getAuditMap().put("Transaction", String.valueOf(termIndex.getIndex())); request.buildAuditMessage(action, builder.getAuditMap(), th, request.getUserInfo()); builder.setLog(true); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java index 1a6c784af68..c7f17e16bb3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java @@ -515,9 +515,8 @@ private void terminate(Throwable t, int status) { } private void terminate(Throwable t, int status, OMResponse omResponse) { - StringBuilder message = new StringBuilder( - "During flush to DB encountered error in " + - "OMDoubleBuffer flush thread " + Thread.currentThread().getName()); + StringBuilder message = new StringBuilder("During flush to DB encountered error in OMDoubleBuffer flush thread ") + .append(Thread.currentThread().getName()); if (omResponse != null) { message.append(" when handling OMRequest: ").append(omResponse); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index e10ca3f9a5a..1583f3fef7f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -204,7 +204,8 @@ public BackgroundTaskResult call() throws Exception { if (LOG.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); for (OpenKeyBucket.Builder openKey : openKeyBuckets) { - sb.append(openKey.getVolumeName() + OZONE_URI_DELIMITER + openKey.getBucketName() + ": ") + sb.append(openKey.getVolumeName()).append(OZONE_URI_DELIMITER).append(openKey.getBucketName()) + .append(": ") .append(openKey.getKeysList().stream().map(OzoneManagerProtocolProtos.OpenKey::getName) .collect(Collectors.toList())) .append("\n"); @@ -226,8 +227,8 @@ public BackgroundTaskResult call() throws Exception { if (LOG.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); for (CommitKeyRequest.Builder openKey : hsyncKeys) { - sb.append(openKey.getKeyArgs().getVolumeName() + OZONE_URI_DELIMITER + - openKey.getKeyArgs().getBucketName() + ": ") + sb.append(openKey.getKeyArgs().getVolumeName()).append(OZONE_URI_DELIMITER) + .append(openKey.getKeyArgs().getBucketName()).append(": ") .append(openKey.getKeyArgs().getKeyName()) .append(", "); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis_snapshot/TestOmRatisSnapshotProvider.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis_snapshot/TestOmRatisSnapshotProvider.java index d74fdc402e0..dac7264d9d9 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis_snapshot/TestOmRatisSnapshotProvider.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis_snapshot/TestOmRatisSnapshotProvider.java @@ -81,7 +81,7 @@ public void setup(@TempDir File snapshotDir, false, connectionFactory); sb = new StringBuilder(); - sb.append("--" + MULTIPART_FORM_DATA_BOUNDARY + CR_NL); + sb.append("--").append(MULTIPART_FORM_DATA_BOUNDARY).append(CR_NL); sb.append(CONTENT_DISPOSITION); } @@ -105,7 +105,7 @@ public void testDownloadSnapshot() throws IOException, omRatisSnapshotProvider.downloadSnapshot(leaderNodeId, targetFile); - sb.append("--" + MULTIPART_FORM_DATA_BOUNDARY + "--" + CR_NL); + sb.append("--").append(MULTIPART_FORM_DATA_BOUNDARY).append("--").append(CR_NL); assertEquals(sb.toString(), new String(outputStream.toByteArray(), StandardCharsets.UTF_8)); } @@ -123,7 +123,7 @@ public void testWriteFormDataWithSstFile() throws IOException { OmRatisSnapshotProvider.writeFormData(connection, sstFiles); sb.append(fileName).append(CR_NL); - sb.append("--" + MULTIPART_FORM_DATA_BOUNDARY + "--" + CR_NL); + sb.append("--").append(MULTIPART_FORM_DATA_BOUNDARY).append("--").append(CR_NL); assertEquals(sb.toString(), new String(outputStream.toByteArray(), StandardCharsets.UTF_8)); } @@ -136,7 +136,7 @@ public void testWriteFormDataWithoutSstFile() throws IOException { OmRatisSnapshotProvider.writeFormData(connection, new ArrayList<>()); - sb.append("--" + MULTIPART_FORM_DATA_BOUNDARY + "--" + CR_NL); + sb.append("--").append(MULTIPART_FORM_DATA_BOUNDARY).append("--").append(CR_NL); assertEquals(sb.toString(), new String(outputStream.toByteArray(), StandardCharsets.UTF_8)); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java index 4bb2737ab1f..63f1052e10e 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java @@ -21,7 +21,6 @@ import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status.OK; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -703,9 +702,11 @@ public void testValidateAndUpdateCacheOnOverwrite() throws Exception { List> rangeKVs = omMetadataManager.getDeletedTable().getRangeKVs(null, 100, deletedKey); assertThat(rangeKVs.size()).isGreaterThan(0); - assertEquals(1, rangeKVs.get(0).getValue().getOmKeyInfoList().size()); - assertFalse(rangeKVs.get(0).getKey().endsWith(rangeKVs.get(0).getValue().getOmKeyInfoList().get(0).getObjectID() - + "")); + Table.KeyValue keyValue = rangeKVs.get(0); + String key = keyValue.getKey(); + List omKeyInfoList = keyValue.getValue().getOmKeyInfoList(); + assertEquals(1, omKeyInfoList.size()); + assertThat(key).doesNotEndWith(String.valueOf(omKeyInfoList.get(0).getObjectID())); } /** diff --git a/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java b/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java index 6107c1eb3ad..0fffeb9edef 100644 --- a/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java +++ b/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java @@ -29,6 +29,8 @@ import org.jooq.DSLContext; import org.jooq.impl.DSL; import org.jooq.impl.SQLDataType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class used to create tables that are required for tracking containers. @@ -38,6 +40,8 @@ public class ContainerSchemaDefinition implements ReconSchemaDefinition { public static final String UNHEALTHY_CONTAINERS_TABLE_NAME = "UNHEALTHY_CONTAINERS"; + private static final Logger LOG = + LoggerFactory.getLogger(ContainerSchemaDefinition.class); /** * ENUM describing the allowed container states which can be stored in the @@ -68,6 +72,7 @@ public void initializeSchema() throws SQLException { Connection conn = dataSource.getConnection(); dslContext = DSL.using(conn); if (!TABLE_EXISTS_CHECK.test(conn, UNHEALTHY_CONTAINERS_TABLE_NAME)) { + LOG.info("UNHEALTHY_CONTAINERS is missing creating new one."); createUnhealthyContainersTable(); } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java index ecd88f80995..6927d77c033 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java @@ -95,11 +95,14 @@ private ReconConstants() { // For file-size count reprocessing: ensure only one task truncates the table. public static final AtomicBoolean FILE_SIZE_COUNT_TABLE_TRUNCATED = new AtomicBoolean(false); + public static final AtomicBoolean CONTAINER_KEY_TABLES_TRUNCATED = new AtomicBoolean(false); + /** - * Resets the table-truncated flag for the given tables. This should be called once per reprocess cycle, + * Resets the table truncated flag for the given tables. This should be called once per reprocess cycle, * for example by the OM task controller, before the tasks run. */ public static void resetTableTruncatedFlags() { FILE_SIZE_COUNT_TABLE_TRUNCATED.set(false); + CONTAINER_KEY_TABLES_TRUNCATED.set(false); } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconControllerModule.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconControllerModule.java index 5bbda035621..ad2d25a29d4 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconControllerModule.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconControllerModule.java @@ -58,7 +58,8 @@ import org.apache.hadoop.ozone.recon.spi.impl.ReconDBProvider; import org.apache.hadoop.ozone.recon.spi.impl.ReconNamespaceSummaryManagerImpl; import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; -import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTask; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTaskFSO; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTaskOBS; import org.apache.hadoop.ozone.recon.tasks.FileSizeCountTaskFSO; import org.apache.hadoop.ozone.recon.tasks.FileSizeCountTaskOBS; import org.apache.hadoop.ozone.recon.tasks.NSSummaryTask; @@ -131,7 +132,8 @@ static class ReconOmTaskBindingModule extends AbstractModule { protected void configure() { Multibinder taskBinder = Multibinder.newSetBinder(binder(), ReconOmTask.class); - taskBinder.addBinding().to(ContainerKeyMapperTask.class); + taskBinder.addBinding().to(ContainerKeyMapperTaskFSO.class); + taskBinder.addBinding().to(ContainerKeyMapperTaskOBS.class); taskBinder.addBinding().to(FileSizeCountTaskFSO.class); taskBinder.addBinding().to(FileSizeCountTaskOBS.class); taskBinder.addBinding().to(OmTableInsightTask.class); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java index c3fd5bb3592..ce4baa60479 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java @@ -106,7 +106,7 @@ public final class ReconServerConfigKeys { public static final String OZONE_RECON_TASK_THREAD_COUNT_KEY = "ozone.recon.task.thread.count"; - public static final int OZONE_RECON_TASK_THREAD_COUNT_DEFAULT = 5; + public static final int OZONE_RECON_TASK_THREAD_COUNT_DEFAULT = 8; public static final String OZONE_RECON_HTTP_AUTH_CONFIG_PREFIX = "ozone.recon.http.auth."; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java index d9d1fd5b4c4..fbbb58a124e 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java @@ -222,9 +222,9 @@ public void onMessage(CommandForDatanode commandForDatanode, * @return SCMheartbeat response. */ @Override - public List processHeartbeat(DatanodeDetails datanodeDetails, + public List> processHeartbeat(DatanodeDetails datanodeDetails, CommandQueueReportProto queueReport) { - List cmds = new ArrayList<>(); + List> cmds = new ArrayList<>(); long currentTime = Time.now(); if (needUpdate(datanodeDetails, currentTime)) { cmds.add(new ReregisterCommand()); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java similarity index 50% rename from hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTask.java rename to hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java index e42e021b9e4..7e5a02ff99a 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java @@ -17,16 +17,10 @@ package org.apache.hadoop.ozone.recon.tasks; -import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.FILE_TABLE; -import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.KEY_TABLE; - -import com.google.inject.Inject; import java.io.IOException; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -34,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; @@ -43,7 +36,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; -import org.apache.hadoop.ozone.recon.ReconServerConfigKeys; +import org.apache.hadoop.ozone.recon.ReconConstants; import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; import org.apache.hadoop.ozone.recon.api.types.KeyPrefixContainer; import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; @@ -51,147 +44,111 @@ import org.slf4j.LoggerFactory; /** - * Class to iterate over the OM DB and populate the Recon container DB with - * the container -> Key reverse mapping. + * Helper class that encapsulates the common logic for ContainerKeyMapperTaskFSO and ContainerKeyMapperTaskOBS. */ -public class ContainerKeyMapperTask implements ReconOmTask { - - private static final Logger LOG = - LoggerFactory.getLogger(ContainerKeyMapperTask.class); - - private ReconContainerMetadataManager reconContainerMetadataManager; - private final long containerKeyFlushToDBMaxThreshold; - - @Inject - public ContainerKeyMapperTask(ReconContainerMetadataManager - reconContainerMetadataManager, - OzoneConfiguration configuration) { - this.reconContainerMetadataManager = reconContainerMetadataManager; - this.containerKeyFlushToDBMaxThreshold = configuration.getLong( - ReconServerConfigKeys. - OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD, - ReconServerConfigKeys. - OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT - ); - } +public abstract class ContainerKeyMapperHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ContainerKeyMapperHelper.class); + + // Static lock to guard table truncation. + private static final Object TRUNCATE_LOCK = new Object(); /** - * Read Key -> ContainerId data from OM snapshot DB and write reverse map - * (container, key) -> count to Recon Container DB. + * Ensures that the container key tables are truncated only once before reprocessing. + * Uses an AtomicBoolean to track if truncation has already been performed. + * + * @param reconContainerMetadataManager The metadata manager instance responsible for DB operations. */ - @Override - public TaskResult reprocess(OMMetadataManager omMetadataManager) { - long omKeyCount = 0; + public static void truncateTablesIfNeeded(ReconContainerMetadataManager reconContainerMetadataManager, + String taskName) { + synchronized (TRUNCATE_LOCK) { + if (ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.compareAndSet(false, true)) { + try { + // Perform table truncation + reconContainerMetadataManager.reinitWithNewContainerDataFromOm(Collections.emptyMap()); + LOG.debug("Successfully truncated container key tables."); + } catch (Exception e) { + // Reset the flag so truncation can be retried + ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.set(false); + LOG.error("Error while truncating container key tables for task {}. Resetting flag.", taskName, e); + throw new RuntimeException("Table truncation failed", e); + } + } else { + LOG.debug("Container key tables already truncated by another task."); + } + } + } - // In-memory maps for fast look up and batch write - // (container, key) -> count + public static boolean reprocess(OMMetadataManager omMetadataManager, + ReconContainerMetadataManager reconContainerMetadataManager, + BucketLayout bucketLayout, + String taskName, + long containerKeyFlushToDBMaxThreshold) { + long omKeyCount = 0; Map containerKeyMap = new HashMap<>(); - // containerId -> key count Map containerKeyCountMap = new HashMap<>(); + try { - LOG.debug("Starting a 'reprocess' run of ContainerKeyMapperTask."); + LOG.debug("Starting a 'reprocess' run for {}.", taskName); Instant start = Instant.now(); - // initialize new container DB - reconContainerMetadataManager - .reinitWithNewContainerDataFromOm(new HashMap<>()); - - // loop over both key table and file table - for (BucketLayout layout : Arrays.asList(BucketLayout.LEGACY, - BucketLayout.FILE_SYSTEM_OPTIMIZED)) { - // (HDDS-8580) Since "reprocess" iterate over the whole key table, - // containerKeyMap needs to be incrementally flushed to DB based on - // configured batch threshold. - // containerKeyCountMap can be flushed at the end since the number - // of containers in a cluster will not have significant memory overhead. - Table omKeyInfoTable = - omMetadataManager.getKeyTable(layout); - try ( - TableIterator> - keyIter = omKeyInfoTable.iterator()) { - while (keyIter.hasNext()) { - Table.KeyValue kv = keyIter.next(); - OmKeyInfo omKeyInfo = kv.getValue(); - handleKeyReprocess(kv.getKey(), omKeyInfo, containerKeyMap, - containerKeyCountMap); - if (!checkAndCallFlushToDB(containerKeyMap)) { - LOG.error("Unable to flush containerKey information to the DB"); - return buildTaskResult(false); - } - omKeyCount++; + // Ensure the tables are truncated only once + truncateTablesIfNeeded(reconContainerMetadataManager, taskName); + + // Get the appropriate table based on BucketLayout + Table omKeyInfoTable = omMetadataManager.getKeyTable(bucketLayout); + + // Iterate through the table and process keys + try (TableIterator> keyIter = omKeyInfoTable.iterator()) { + while (keyIter.hasNext()) { + Table.KeyValue kv = keyIter.next(); + handleKeyReprocess(kv.getKey(), kv.getValue(), containerKeyMap, containerKeyCountMap, + reconContainerMetadataManager); + omKeyCount++; + + // Check and flush data if it reaches the batch threshold + if (!checkAndCallFlushToDB(containerKeyMap, containerKeyFlushToDBMaxThreshold, + reconContainerMetadataManager)) { + LOG.error("Failed to flush container key data for {}", taskName); + return false; } } } - // flush and commit left out keys at end, - // also batch write containerKeyCountMap to the containerKeyCountTable - if (!flushAndCommitContainerKeyInfoToDB(containerKeyMap, - containerKeyCountMap)) { - LOG.error("Unable to flush Container Key Count and " + - "remaining Container Key information to the DB"); - return buildTaskResult(false); + // Final flush and commit + if (!flushAndCommitContainerKeyInfoToDB(containerKeyMap, containerKeyCountMap, reconContainerMetadataManager)) { + LOG.error("Failed to flush Container Key data to DB for {}", taskName); + return false; } - LOG.debug("Completed 'reprocess' of ContainerKeyMapperTask."); Instant end = Instant.now(); - long duration = Duration.between(start, end).toMillis(); - LOG.debug("It took me {} seconds to process {} keys.", - (double) duration / 1000.0, omKeyCount); - } catch (IOException ioEx) { - LOG.error("Unable to populate Container Key data in Recon DB. ", - ioEx); - return buildTaskResult(false); - } - return buildTaskResult(true); - } + long durationMillis = Duration.between(start, end).toMillis(); + double durationSeconds = (double) durationMillis / 1000.0; + LOG.debug("Completed 'reprocess' for {}. Processed {} keys in {} ms ({} seconds).", + taskName, omKeyCount, durationMillis, durationSeconds); - private boolean flushAndCommitContainerKeyInfoToDB( - Map containerKeyMap, - Map containerKeyCountMap) { - try { - // deleted container list is not needed since "reprocess" only has - // put operations - writeToTheDB(containerKeyMap, containerKeyCountMap, - Collections.emptyList()); - containerKeyMap.clear(); - containerKeyCountMap.clear(); - } catch (IOException e) { - LOG.error("Unable to write Container Key and " + - "Container Key Count data in Recon DB.", e); + } catch (IOException ioEx) { + LOG.error("Error populating Container Key data for {} in Recon DB.", taskName, ioEx); return false; } return true; } - private boolean checkAndCallFlushToDB( - Map containerKeyMap) { - // if containerKeyMap more than entries, flush to DB and clear the map - if (null != containerKeyMap && containerKeyMap.size() >= - containerKeyFlushToDBMaxThreshold) { - return flushAndCommitContainerKeyInfoToDB(containerKeyMap, - Collections.emptyMap()); + private static boolean checkAndCallFlushToDB(Map containerKeyMap, + long containerKeyFlushToDBMaxThreshold, + ReconContainerMetadataManager reconContainerMetadataManager) { + if (containerKeyMap.size() >= containerKeyFlushToDBMaxThreshold) { + return flushAndCommitContainerKeyInfoToDB(containerKeyMap, Collections.emptyMap(), reconContainerMetadataManager); } return true; } - @Override - public String getTaskName() { - return "ContainerKeyMapperTask"; - } - - public Collection getTaskTables() { - List taskTables = new ArrayList<>(); - taskTables.add(KEY_TABLE); - taskTables.add(FILE_TABLE); - return taskTables; - } - - @Override - public TaskResult process(OMUpdateEventBatch events, - Map subTaskSeekPosMap) { + public static boolean process(OMUpdateEventBatch events, + String tableName, + ReconContainerMetadataManager reconContainerMetadataManager, + String taskName) { Iterator eventIterator = events.getIterator(); int eventCount = 0; - final Collection taskTables = getTaskTables(); // In-memory maps for fast look up and batch write // (HDDS-8580) containerKeyMap map is allowed to be used @@ -205,10 +162,11 @@ public TaskResult process(OMUpdateEventBatch events, // List of the deleted (container, key) pair's List deletedKeyCountList = new ArrayList<>(); long startTime = System.currentTimeMillis(); + while (eventIterator.hasNext()) { OMDBUpdateEvent omdbUpdateEvent = eventIterator.next(); // Filter event inside process method to avoid duping - if (!taskTables.contains(omdbUpdateEvent.getTable())) { + if (!tableName.equals(omdbUpdateEvent.getTable())) { continue; } String updatedKey = omdbUpdateEvent.getKey(); @@ -217,92 +175,108 @@ public TaskResult process(OMUpdateEventBatch events, switch (omdbUpdateEvent.getAction()) { case PUT: handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, - containerKeyCountMap, deletedKeyCountList); + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); break; case DELETE: handleDeleteOMKeyEvent(updatedKey, containerKeyMap, - containerKeyCountMap, deletedKeyCountList); + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); break; case UPDATE: if (omdbUpdateEvent.getOldValue() != null) { handleDeleteOMKeyEvent( omdbUpdateEvent.getOldValue().getKeyName(), containerKeyMap, - containerKeyCountMap, deletedKeyCountList); + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); } else { - LOG.warn("Update event does not have the old Key Info for {}.", - updatedKey); + LOG.warn("Update event does not have the old Key Info for {}.", updatedKey); } handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, - containerKeyCountMap, deletedKeyCountList); + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); break; - default: LOG.debug("Skipping DB update event : {}", - omdbUpdateEvent.getAction()); + default: + LOG.info("Skipping DB update event: {}", omdbUpdateEvent.getAction()); } eventCount++; } catch (IOException e) { - LOG.error("Unexpected exception while updating key data : {} ", - updatedKey, e); - return buildTaskResult(false); + LOG.error("Unexpected exception while updating key data: {} ", updatedKey, e); + return false; } } try { - writeToTheDB(containerKeyMap, containerKeyCountMap, deletedKeyCountList); + writeToTheDB(containerKeyMap, containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); } catch (IOException e) { LOG.error("Unable to write Container Key Prefix data in Recon DB.", e); - return buildTaskResult(false); + return false; } LOG.debug("{} successfully processed {} OM DB update event(s) in {} milliseconds.", - getTaskName(), eventCount, (System.currentTimeMillis() - startTime)); - return buildTaskResult(true); + taskName, eventCount, (System.currentTimeMillis() - startTime)); + return true; } - private void writeToTheDB(Map containerKeyMap, - Map containerKeyCountMap, - List deletedContainerKeyList) + /** + * Note to add an OM key and update containerID -> no. of keys count. + * + * @param key key String + * @param omKeyInfo omKeyInfo value + * @param containerKeyMap we keep the added containerKeys in this map + * (in this batch) + * @param containerKeyCountMap we keep the containerKey counts in this map + * @param deletedContainerKeyList list of the deleted containerKeys + * @throws IOException if unable to write to recon DB. + */ + private static void handlePutOMKeyEvent(String key, OmKeyInfo omKeyInfo, + Map containerKeyMap, + Map containerKeyCountMap, + List deletedContainerKeyList, + ReconContainerMetadataManager reconContainerMetadataManager) throws IOException { - try (RDBBatchOperation rdbBatchOperation = new RDBBatchOperation()) { - containerKeyMap.keySet().forEach((ContainerKeyPrefix key) -> { - try { - reconContainerMetadataManager - .batchStoreContainerKeyMapping(rdbBatchOperation, key, - containerKeyMap.get(key)); - } catch (IOException e) { - LOG.error("Unable to write Container Key Prefix data in Recon DB.", - e); - } - }); + long containerCountToIncrement = 0; + for (OmKeyLocationInfoGroup omKeyLocationInfoGroup : omKeyInfo.getKeyLocationVersions()) { + long keyVersion = omKeyLocationInfoGroup.getVersion(); + for (OmKeyLocationInfo omKeyLocationInfo : omKeyLocationInfoGroup.getLocationList()) { + long containerId = omKeyLocationInfo.getContainerID(); + ContainerKeyPrefix containerKeyPrefix = ContainerKeyPrefix.get(containerId, key, keyVersion); + if (reconContainerMetadataManager.getCountForContainerKeyPrefix(containerKeyPrefix) == 0 && + !containerKeyMap.containsKey(containerKeyPrefix)) { + // Save on writes. No need to save same container-key prefix + // mapping again. + containerKeyMap.put(containerKeyPrefix, 1); + // Remove the container-key prefix from the deleted list if we + // previously deleted it in this batch (and now we add it again) + deletedContainerKeyList.remove(containerKeyPrefix); + // check if container already exists and + // increment the count of containers if it does not exist + if (!reconContainerMetadataManager.doesContainerExists(containerId) && + !containerKeyCountMap.containsKey(containerId)) { + containerCountToIncrement++; + } - containerKeyCountMap.keySet().forEach((Long key) -> { - try { - reconContainerMetadataManager - .batchStoreContainerKeyCounts(rdbBatchOperation, key, - containerKeyCountMap.get(key)); - } catch (IOException e) { - LOG.error("Unable to write Container Key Prefix data in Recon DB.", - e); - } - }); + // update the count of keys for the given containerID + long keyCount; + if (containerKeyCountMap.containsKey(containerId)) { + keyCount = containerKeyCountMap.get(containerId); + } else { + keyCount = reconContainerMetadataManager.getKeyCountForContainer(containerId); + } - deletedContainerKeyList.forEach((ContainerKeyPrefix key) -> { - try { - reconContainerMetadataManager - .batchDeleteContainerMapping(rdbBatchOperation, key); - } catch (IOException e) { - LOG.error("Unable to write Container Key Prefix data in Recon DB.", - e); + // increment the count and update containerKeyCount. + // keyCount will be 0 if containerID is not found. So, there is no + // need to initialize keyCount for the first time. + containerKeyCountMap.put(containerId, ++keyCount); } - }); + } + } - reconContainerMetadataManager.commitBatchOperation(rdbBatchOperation); + if (containerCountToIncrement > 0) { + reconContainerMetadataManager.incrementContainerCountBy(containerCountToIncrement); } } /** - * Note to delete an OM Key and update the containerID -> no. of keys counts + * Note to delete an OM Key and update the containerID -> no. of keys counts * (we are preparing for batch deletion in these data structures). * * @param key key String. @@ -312,24 +286,21 @@ private void writeToTheDB(Map containerKeyMap, * @param deletedContainerKeyList list of the deleted containerKeys * @throws IOException If Unable to write to container DB. */ - private void handleDeleteOMKeyEvent(String key, - Map - containerKeyMap, - Map containerKeyCountMap, - List - deletedContainerKeyList) + private static void handleDeleteOMKeyEvent(String key, + Map containerKeyMap, + Map containerKeyCountMap, + List deletedContainerKeyList, + ReconContainerMetadataManager reconContainerMetadataManager) throws IOException { Set keysToBeDeleted = new HashSet<>(); - try (TableIterator> keyContainerIterator = - reconContainerMetadataManager.getKeyContainerTableIterator()) { + try (TableIterator> + keyContainerIterator = reconContainerMetadataManager.getKeyContainerTableIterator()) { // Check if we have keys in this container in the DB keyContainerIterator.seek(KeyPrefixContainer.get(key)); while (keyContainerIterator.hasNext()) { - Table.KeyValue keyValue = - keyContainerIterator.next(); + Table.KeyValue keyValue = keyContainerIterator.next(); String keyPrefix = keyValue.getKey().getKeyPrefix(); if (keyPrefix.equals(key)) { if (keyValue.getKey().getContainerId() != -1) { @@ -342,13 +313,12 @@ private void handleDeleteOMKeyEvent(String key, } // Check if we have keys in this container in our containerKeyMap - containerKeyMap.keySet() - .forEach((ContainerKeyPrefix containerKeyPrefix) -> { - String keyPrefix = containerKeyPrefix.getKeyPrefix(); - if (keyPrefix.equals(key)) { - keysToBeDeleted.add(containerKeyPrefix); - } - }); + containerKeyMap.keySet().forEach((ContainerKeyPrefix containerKeyPrefix) -> { + String keyPrefix = containerKeyPrefix.getKeyPrefix(); + if (keyPrefix.equals(key)) { + keysToBeDeleted.add(containerKeyPrefix); + } + }); for (ContainerKeyPrefix containerKeyPrefix : keysToBeDeleted) { deletedContainerKeyList.add(containerKeyPrefix); @@ -356,14 +326,13 @@ private void handleDeleteOMKeyEvent(String key, // it in this batch (and now we delete it) containerKeyMap.remove(containerKeyPrefix); - // decrement count and update containerKeyCount. + // Decrement count and update containerKeyCount. Long containerID = containerKeyPrefix.getContainerId(); long keyCount; if (containerKeyCountMap.containsKey(containerID)) { keyCount = containerKeyCountMap.get(containerID); } else { - keyCount = reconContainerMetadataManager - .getKeyCountForContainer(containerID); + keyCount = reconContainerMetadataManager.getKeyCountForContainer(containerID); } if (keyCount > 0) { containerKeyCountMap.put(containerID, --keyCount); @@ -371,130 +340,121 @@ private void handleDeleteOMKeyEvent(String key, } } - /** - * Note to add an OM key and update containerID -> no. of keys count. - * - * @param key key String - * @param omKeyInfo omKeyInfo value - * @param containerKeyMap we keep the added containerKeys in this map - * (in this batch) - * @param containerKeyCountMap we keep the containerKey counts in this map - * @param deletedContainerKeyList list of the deleted containerKeys - * @throws IOException if unable to write to recon DB. - */ - private void handlePutOMKeyEvent(String key, OmKeyInfo omKeyInfo, - Map - containerKeyMap, + private static void writeToTheDB(Map containerKeyMap, Map containerKeyCountMap, - List - deletedContainerKeyList) + List deletedContainerKeyList, + ReconContainerMetadataManager reconContainerMetadataManager) throws IOException { - long containerCountToIncrement = 0; - for (OmKeyLocationInfoGroup omKeyLocationInfoGroup : omKeyInfo - .getKeyLocationVersions()) { - long keyVersion = omKeyLocationInfoGroup.getVersion(); - for (OmKeyLocationInfo omKeyLocationInfo : omKeyLocationInfoGroup - .getLocationList()) { - long containerId = omKeyLocationInfo.getContainerID(); - ContainerKeyPrefix containerKeyPrefix = ContainerKeyPrefix.get( - containerId, key, keyVersion); - if (reconContainerMetadataManager.getCountForContainerKeyPrefix( - containerKeyPrefix) == 0 - && !containerKeyMap.containsKey(containerKeyPrefix)) { - // Save on writes. No need to save same container-key prefix - // mapping again. - containerKeyMap.put(containerKeyPrefix, 1); - // Remove the container-key prefix from the deleted list if we - // previously deleted it in this batch (and now we add it again) - deletedContainerKeyList.remove(containerKeyPrefix); + try (RDBBatchOperation rdbBatchOperation = new RDBBatchOperation()) { - // check if container already exists and - // increment the count of containers if it does not exist - if (!reconContainerMetadataManager.doesContainerExists(containerId) - && !containerKeyCountMap.containsKey(containerId)) { - containerCountToIncrement++; - } + // Write container key mappings + containerKeyMap.keySet().forEach((ContainerKeyPrefix key) -> { + try { + reconContainerMetadataManager.batchStoreContainerKeyMapping( + rdbBatchOperation, key, containerKeyMap.get(key)); + } catch (IOException e) { + LOG.error("Unable to write Container Key Prefix data in Recon DB.", e); + } + }); - // update the count of keys for the given containerID - long keyCount; - if (containerKeyCountMap.containsKey(containerId)) { - keyCount = containerKeyCountMap.get(containerId); - } else { - keyCount = reconContainerMetadataManager - .getKeyCountForContainer(containerId); - } + // Write container key count mappings + containerKeyCountMap.keySet().forEach((Long key) -> { + try { + reconContainerMetadataManager.batchStoreContainerKeyCounts( + rdbBatchOperation, key, containerKeyCountMap.get(key)); + } catch (IOException e) { + LOG.error("Unable to write Container Key Count data in Recon DB.", e); + } + }); - // increment the count and update containerKeyCount. - // keyCount will be 0 if containerID is not found. So, there is no - // need to initialize keyCount for the first time. - containerKeyCountMap.put(containerId, ++keyCount); + // Delete container key mappings + deletedContainerKeyList.forEach((ContainerKeyPrefix key) -> { + try { + reconContainerMetadataManager.batchDeleteContainerMapping( + rdbBatchOperation, key); + } catch (IOException e) { + LOG.error("Unable to delete Container Key Prefix data in Recon DB.", e); } - } - } + }); - if (containerCountToIncrement > 0) { - reconContainerMetadataManager - .incrementContainerCountBy(containerCountToIncrement); + // Commit batch operation + reconContainerMetadataManager.commitBatchOperation(rdbBatchOperation); } } /** - * Write an OM key to container DB and update containerID -> no. of keys + * Write an OM key to container DB and update containerID -> no. of keys * count to the Global Stats table. * * @param key key String * @param omKeyInfo omKeyInfo value * @param containerKeyMap we keep the added containerKeys in this map * to allow incremental batching to containerKeyTable - * @param containerKeyCountMap we keep the containerKey counts in this map + * @param containerKeyCountMap we keep the containerKey counts in this map * to allow batching to containerKeyCountTable * after reprocessing is done + * @param reconContainerMetadataManager Recon metadata manager instance * @throws IOException if unable to write to recon DB. */ - private void handleKeyReprocess(String key, - OmKeyInfo omKeyInfo, - Map - containerKeyMap, - Map containerKeyCountMap) + public static void handleKeyReprocess(String key, + OmKeyInfo omKeyInfo, + Map containerKeyMap, + Map containerKeyCountMap, + ReconContainerMetadataManager reconContainerMetadataManager) throws IOException { + long containerCountToIncrement = 0; - for (OmKeyLocationInfoGroup omKeyLocationInfoGroup : omKeyInfo - .getKeyLocationVersions()) { + + for (OmKeyLocationInfoGroup omKeyLocationInfoGroup : omKeyInfo.getKeyLocationVersions()) { long keyVersion = omKeyLocationInfoGroup.getVersion(); - for (OmKeyLocationInfo omKeyLocationInfo : omKeyLocationInfoGroup - .getLocationList()) { + for (OmKeyLocationInfo omKeyLocationInfo : omKeyLocationInfoGroup.getLocationList()) { long containerId = omKeyLocationInfo.getContainerID(); - ContainerKeyPrefix containerKeyPrefix = ContainerKeyPrefix.get( - containerId, key, keyVersion); - if (reconContainerMetadataManager.getCountForContainerKeyPrefix( - containerKeyPrefix) == 0 + ContainerKeyPrefix containerKeyPrefix = ContainerKeyPrefix.get(containerId, key, keyVersion); + + if (reconContainerMetadataManager.getCountForContainerKeyPrefix(containerKeyPrefix) == 0 && !containerKeyMap.containsKey(containerKeyPrefix)) { - // Save on writes. No need to save same container-key prefix - // mapping again. + // Save on writes. No need to save same container-key prefix mapping again. containerKeyMap.put(containerKeyPrefix, 1); - // check if container already exists and - // if it exists, update the count of keys for the given containerID - // else, increment the count of containers and initialize keyCount - long keyCount; - if (containerKeyCountMap.containsKey(containerId)) { - keyCount = containerKeyCountMap.get(containerId); - } else { + // Check if container already exists; if not, increment the count + if (!reconContainerMetadataManager.doesContainerExists(containerId) + && !containerKeyCountMap.containsKey(containerId)) { containerCountToIncrement++; - keyCount = 0; } - // increment the count and update containerKeyCount. - containerKeyCountMap.put(containerId, ++keyCount); + // Update the count of keys for the given containerID + long keyCount = containerKeyCountMap.getOrDefault(containerId, + reconContainerMetadataManager.getKeyCountForContainer(containerId)); + + containerKeyCountMap.put(containerId, keyCount + 1); } } } if (containerCountToIncrement > 0) { - reconContainerMetadataManager - .incrementContainerCountBy(containerCountToIncrement); + reconContainerMetadataManager.incrementContainerCountBy(containerCountToIncrement); } } + public static boolean flushAndCommitContainerKeyInfoToDB( + Map containerKeyMap, + Map containerKeyCountMap, + ReconContainerMetadataManager reconContainerMetadataManager) { + + try { + // No deleted container list needed since "reprocess" only has put operations + writeToTheDB(containerKeyMap, containerKeyCountMap, Collections.emptyList(), reconContainerMetadataManager); + + // Clear in-memory maps after successful commit + containerKeyMap.clear(); + containerKeyCountMap.clear(); + + } catch (IOException e) { + LOG.error("Unable to write Container Key and Container Key Count data in Recon DB.", e); + return false; + } + return true; + } + } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskFSO.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskFSO.java new file mode 100644 index 00000000000..fd8df1a1e4f --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskFSO.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import com.google.inject.Inject; +import java.util.Map; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.recon.ReconServerConfigKeys; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; + +/** + * Task for processing ContainerKey mapping specifically for FSO buckets. + */ +public class ContainerKeyMapperTaskFSO implements ReconOmTask { + + private final ReconContainerMetadataManager reconContainerMetadataManager; + private final OzoneConfiguration ozoneConfiguration; + + @Inject + public ContainerKeyMapperTaskFSO(ReconContainerMetadataManager reconContainerMetadataManager, + OzoneConfiguration configuration) { + this.reconContainerMetadataManager = reconContainerMetadataManager; + this.ozoneConfiguration = configuration; + } + + @Override + public TaskResult reprocess(OMMetadataManager omMetadataManager) { + long containerKeyFlushToDBMaxThreshold = ozoneConfiguration.getLong( + ReconServerConfigKeys.OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD, + ReconServerConfigKeys.OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT); + boolean result = ContainerKeyMapperHelper.reprocess( + omMetadataManager, reconContainerMetadataManager, + BucketLayout.FILE_SYSTEM_OPTIMIZED, getTaskName(), containerKeyFlushToDBMaxThreshold); + return buildTaskResult(result); + } + + @Override + public String getTaskName() { + return "ContainerKeyMapperTaskFSO"; + } + + @Override + public TaskResult process(OMUpdateEventBatch events, Map subTaskSeekPosMap) { + boolean result = + ContainerKeyMapperHelper.process(events, "fileTable", reconContainerMetadataManager, getTaskName()); + return buildTaskResult(result); + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskOBS.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskOBS.java new file mode 100644 index 00000000000..178ee8b0286 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperTaskOBS.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import com.google.inject.Inject; +import java.util.Map; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.recon.ReconServerConfigKeys; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; + +/** + * Task for processing ContainerKey mapping specifically for OBS buckets. + */ +public class ContainerKeyMapperTaskOBS implements ReconOmTask { + + private final ReconContainerMetadataManager reconContainerMetadataManager; + private final OzoneConfiguration ozoneConfiguration; + + @Inject + public ContainerKeyMapperTaskOBS(ReconContainerMetadataManager reconContainerMetadataManager, + OzoneConfiguration configuration) { + this.reconContainerMetadataManager = reconContainerMetadataManager; + this.ozoneConfiguration = configuration; + } + + @Override + public TaskResult reprocess(OMMetadataManager omMetadataManager) { + long containerKeyFlushToDBMaxThreshold = ozoneConfiguration.getLong( + ReconServerConfigKeys.OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD, + ReconServerConfigKeys.OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT); + boolean result = ContainerKeyMapperHelper.reprocess( + omMetadataManager, reconContainerMetadataManager, BucketLayout.OBJECT_STORE, getTaskName(), + containerKeyFlushToDBMaxThreshold); + return buildTaskResult(result); + } + + @Override + public String getTaskName() { + return "ContainerKeyMapperTaskOBS"; + } + + @Override + public TaskResult process(OMUpdateEventBatch events, Map subTaskSeekPosMap) { + boolean result = ContainerKeyMapperHelper.process(events, "keyTable", reconContainerMetadataManager, getTaskName()); + return buildTaskResult(result); + } +} diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index 9353f406a77..65967c6e24f 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -99,7 +99,8 @@ import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; -import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTask; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTaskFSO; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTaskOBS; import org.apache.hadoop.ozone.recon.tasks.NSSummaryTaskWithFSO; import org.apache.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates; import org.apache.ozone.recon.schema.generated.tables.pojos.UnhealthyContainers; @@ -297,10 +298,13 @@ public void setUp() throws Exception { } private void reprocessContainerKeyMapper() { - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, - omConfiguration); - containerKeyMapperTask.reprocess(reconOMMetadataManager); + ContainerKeyMapperTaskOBS containerKeyMapperTaskOBS = + new ContainerKeyMapperTaskOBS(reconContainerMetadataManager, omConfiguration); + containerKeyMapperTaskOBS.reprocess(reconOMMetadataManager); + + ContainerKeyMapperTaskFSO containerKeyMapperTaskFSO = + new ContainerKeyMapperTaskFSO(reconContainerMetadataManager, omConfiguration); + containerKeyMapperTaskFSO.reprocess(reconOMMetadataManager); } private void setUpFSOData() throws IOException { @@ -1144,6 +1148,11 @@ protected ContainerWithPipeline getTestContainer( return new ContainerWithPipeline(containerInfo, localPipeline); } + void assertContainerCount(HddsProtos.LifeCycleState state, int expected) { + final int computed = containerStateManager.getContainerCount(state); + assertEquals(expected, computed); + } + @Test public void testGetSCMDeletedContainers() throws Exception { reconContainerManager.addNewContainer( @@ -1161,9 +1170,7 @@ public void testGetSCMDeletedContainers() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(102L), HddsProtos.LifeCycleEvent.CLEANUP); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 1); reconContainerManager.updateContainerState(ContainerID.valueOf(103L), HddsProtos.LifeCycleEvent.FINALIZE); @@ -1172,14 +1179,10 @@ public void testGetSCMDeletedContainers() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(103L), HddsProtos.LifeCycleEvent.DELETE); - containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETING); reconContainerManager .updateContainerState(ContainerID.valueOf(103L), HddsProtos.LifeCycleEvent.CLEANUP); - containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(2, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 2); Response scmDeletedContainers = containerEndpoint.getSCMDeletedContainers(2, 0); @@ -1212,9 +1215,7 @@ public void testGetSCMDeletedContainersLimitParam() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(104L), HddsProtos.LifeCycleEvent.CLEANUP); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 1); reconContainerManager.updateContainerState(ContainerID.valueOf(105L), HddsProtos.LifeCycleEvent.FINALIZE); @@ -1226,9 +1227,7 @@ public void testGetSCMDeletedContainersLimitParam() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(105L), HddsProtos.LifeCycleEvent.CLEANUP); - containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(2, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 2); Response scmDeletedContainers = containerEndpoint.getSCMDeletedContainers(1, 0); @@ -1258,9 +1257,7 @@ public void testGetSCMDeletedContainersPrevKeyParam() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(106L), HddsProtos.LifeCycleEvent.CLEANUP); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 1); reconContainerManager.updateContainerState(ContainerID.valueOf(107L), HddsProtos.LifeCycleEvent.FINALIZE); @@ -1272,9 +1269,7 @@ public void testGetSCMDeletedContainersPrevKeyParam() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(107L), HddsProtos.LifeCycleEvent.CLEANUP); - containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(2, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 2); Response scmDeletedContainers = containerEndpoint.getSCMDeletedContainers(2, 106L); @@ -1554,16 +1549,12 @@ public void testGetOmContainersDeletedInSCM() throws Exception { reconContainerManager .updateContainerState(ContainerID.valueOf(1), HddsProtos.LifeCycleEvent.DELETE); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETING); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETING, 1); reconContainerManager .updateContainerState(ContainerID.valueOf(1), HddsProtos.LifeCycleEvent.CLEANUP); - containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 1); List deletedSCMContainers = reconContainerManager.getContainers(HddsProtos.LifeCycleState.DELETED); @@ -1603,9 +1594,7 @@ public void testGetOmContainersDeletedInSCMLimitParam() throws Exception { // and then to DELETED updateContainerStateToDeleted(1); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(1, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 1); List deletedSCMContainers = reconContainerManager.getContainers(HddsProtos.LifeCycleState.DELETED); @@ -1647,9 +1636,7 @@ public void testGetOmContainersDeletedInSCMPrevContainerParam() updateContainerStateToDeleted(1); updateContainerStateToDeleted(2); - Set containerIDs = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - assertEquals(2, containerIDs.size()); + assertContainerCount(HddsProtos.LifeCycleState.DELETED, 2); List deletedSCMContainers = reconContainerManager.getContainers(HddsProtos.LifeCycleState.DELETED); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java index d7873b25a46..eb09cf3ca70 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java @@ -954,12 +954,12 @@ public void testGetFileCounts() throws Exception { public void testGetContainerCounts() throws Exception { // Mock container info objects with different sizes ContainerInfo omContainerInfo1 = mock(ContainerInfo.class); - given(omContainerInfo1.containerID()).willReturn(new ContainerID(1)); + given(omContainerInfo1.containerID()).willReturn(ContainerID.valueOf(1)); given(omContainerInfo1.getUsedBytes()).willReturn(1500000000L); // 1.5GB given(omContainerInfo1.getState()).willReturn(LifeCycleState.OPEN); ContainerInfo omContainerInfo2 = mock(ContainerInfo.class); - given(omContainerInfo2.containerID()).willReturn(new ContainerID(2)); + given(omContainerInfo2.containerID()).willReturn(ContainerID.valueOf(2)); given(omContainerInfo2.getUsedBytes()).willReturn(2500000000L); // 2.5GB given(omContainerInfo2.getState()).willReturn(LifeCycleState.OPEN); @@ -1437,14 +1437,14 @@ private List getNodeDetails(int n) { private Map> getContainersOnDecomNodes() { Map> containerMap = new HashMap<>(); List underReplicated = new ArrayList<>(); - underReplicated.add(new ContainerID(1L)); - underReplicated.add(new ContainerID(2L)); - underReplicated.add(new ContainerID(3L)); + underReplicated.add(ContainerID.valueOf(1L)); + underReplicated.add(ContainerID.valueOf(2L)); + underReplicated.add(ContainerID.valueOf(3L)); containerMap.put("UnderReplicated", underReplicated); List unclosed = new ArrayList<>(); - unclosed.add(new ContainerID(10L)); - unclosed.add(new ContainerID(11L)); - unclosed.add(new ContainerID(12L)); + unclosed.add(ContainerID.valueOf(10L)); + unclosed.add(ContainerID.valueOf(11L)); + unclosed.add(ContainerID.valueOf(12L)); containerMap.put("UnClosed", unclosed); return containerMap; } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java index d5ebf03a8e1..30d786918b4 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java @@ -1352,42 +1352,42 @@ private static ReconStorageContainerManagerFacade getMockReconSCM() ContainerManager containerManager = mock(ContainerManager.class); // Container 1 is 3-way replicated - ContainerID containerID1 = new ContainerID(CONTAINER_ONE_ID); + ContainerID containerID1 = ContainerID.valueOf(CONTAINER_ONE_ID); Set containerReplicas1 = generateMockContainerReplicas( CONTAINER_ONE_REPLICA_COUNT, containerID1); when(containerManager.getContainerReplicas(containerID1)) .thenReturn(containerReplicas1); // Container 2 is under replicated with 2 replica - ContainerID containerID2 = new ContainerID(CONTAINER_TWO_ID); + ContainerID containerID2 = ContainerID.valueOf(CONTAINER_TWO_ID); Set containerReplicas2 = generateMockContainerReplicas( CONTAINER_TWO_REPLICA_COUNT, containerID2); when(containerManager.getContainerReplicas(containerID2)) .thenReturn(containerReplicas2); // Container 3 is over replicated with 4 replica - ContainerID containerID3 = new ContainerID(CONTAINER_THREE_ID); + ContainerID containerID3 = ContainerID.valueOf(CONTAINER_THREE_ID); Set containerReplicas3 = generateMockContainerReplicas( CONTAINER_THREE_REPLICA_COUNT, containerID3); when(containerManager.getContainerReplicas(containerID3)) .thenReturn(containerReplicas3); // Container 4 is replicated with 5 replica - ContainerID containerID4 = new ContainerID(CONTAINER_FOUR_ID); + ContainerID containerID4 = ContainerID.valueOf(CONTAINER_FOUR_ID); Set containerReplicas4 = generateMockContainerReplicas( CONTAINER_FOUR_REPLICA_COUNT, containerID4); when(containerManager.getContainerReplicas(containerID4)) .thenReturn(containerReplicas4); // Container 5 is replicated with 2 replica - ContainerID containerID5 = new ContainerID(CONTAINER_FIVE_ID); + ContainerID containerID5 = ContainerID.valueOf(CONTAINER_FIVE_ID); Set containerReplicas5 = generateMockContainerReplicas( CONTAINER_FIVE_REPLICA_COUNT, containerID5); when(containerManager.getContainerReplicas(containerID5)) .thenReturn(containerReplicas5); // Container 6 is replicated with 3 replica - ContainerID containerID6 = new ContainerID(CONTAINER_SIX_ID); + ContainerID containerID6 = ContainerID.valueOf(CONTAINER_SIX_ID); Set containerReplicas6 = generateMockContainerReplicas( CONTAINER_SIX_REPLICA_COUNT, containerID6); when(containerManager.getContainerReplicas(containerID6)) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java index 1a01e125435..3a37c45b5f3 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java @@ -1277,42 +1277,42 @@ private static ReconStorageContainerManagerFacade getMockReconSCM() ContainerManager containerManager = mock(ContainerManager.class); // Container 1 is 3-way replicated - ContainerID containerID1 = new ContainerID(CONTAINER_ONE_ID); + ContainerID containerID1 = ContainerID.valueOf(CONTAINER_ONE_ID); Set containerReplicas1 = generateMockContainerReplicas( CONTAINER_ONE_REPLICA_COUNT, containerID1); when(containerManager.getContainerReplicas(containerID1)) .thenReturn(containerReplicas1); // Container 2 is under replicated with 2 replica - ContainerID containerID2 = new ContainerID(CONTAINER_TWO_ID); + ContainerID containerID2 = ContainerID.valueOf(CONTAINER_TWO_ID); Set containerReplicas2 = generateMockContainerReplicas( CONTAINER_TWO_REPLICA_COUNT, containerID2); when(containerManager.getContainerReplicas(containerID2)) .thenReturn(containerReplicas2); // Container 3 is over replicated with 4 replica - ContainerID containerID3 = new ContainerID(CONTAINER_THREE_ID); + ContainerID containerID3 = ContainerID.valueOf(CONTAINER_THREE_ID); Set containerReplicas3 = generateMockContainerReplicas( CONTAINER_THREE_REPLICA_COUNT, containerID3); when(containerManager.getContainerReplicas(containerID3)) .thenReturn(containerReplicas3); // Container 4 is replicated with 5 replica - ContainerID containerID4 = new ContainerID(CONTAINER_FOUR_ID); + ContainerID containerID4 = ContainerID.valueOf(CONTAINER_FOUR_ID); Set containerReplicas4 = generateMockContainerReplicas( CONTAINER_FOUR_REPLICA_COUNT, containerID4); when(containerManager.getContainerReplicas(containerID4)) .thenReturn(containerReplicas4); // Container 5 is replicated with 2 replica - ContainerID containerID5 = new ContainerID(CONTAINER_FIVE_ID); + ContainerID containerID5 = ContainerID.valueOf(CONTAINER_FIVE_ID); Set containerReplicas5 = generateMockContainerReplicas( CONTAINER_FIVE_REPLICA_COUNT, containerID5); when(containerManager.getContainerReplicas(containerID5)) .thenReturn(containerReplicas5); // Container 6 is replicated with 3 replica - ContainerID containerID6 = new ContainerID(CONTAINER_SIX_ID); + ContainerID containerID6 = ContainerID.valueOf(CONTAINER_SIX_ID); Set containerReplicas6 = generateMockContainerReplicas( CONTAINER_SIX_REPLICA_COUNT, containerID6); when(containerManager.getContainerReplicas(containerID6)) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithOBSAndLegacy.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithOBSAndLegacy.java index 96fb41272f3..a4fa0e220d2 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithOBSAndLegacy.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithOBSAndLegacy.java @@ -1396,42 +1396,42 @@ private static ReconStorageContainerManagerFacade getMockReconSCM() ContainerManager containerManager = mock(ContainerManager.class); // Container 1 is 3-way replicated - ContainerID containerID1 = new ContainerID(CONTAINER_ONE_ID); + ContainerID containerID1 = ContainerID.valueOf(CONTAINER_ONE_ID); Set containerReplicas1 = generateMockContainerReplicas( CONTAINER_ONE_REPLICA_COUNT, containerID1); when(containerManager.getContainerReplicas(containerID1)) .thenReturn(containerReplicas1); // Container 2 is under replicated with 2 replica - ContainerID containerID2 = new ContainerID(CONTAINER_TWO_ID); + ContainerID containerID2 = ContainerID.valueOf(CONTAINER_TWO_ID); Set containerReplicas2 = generateMockContainerReplicas( CONTAINER_TWO_REPLICA_COUNT, containerID2); when(containerManager.getContainerReplicas(containerID2)) .thenReturn(containerReplicas2); // Container 3 is over replicated with 4 replica - ContainerID containerID3 = new ContainerID(CONTAINER_THREE_ID); + ContainerID containerID3 = ContainerID.valueOf(CONTAINER_THREE_ID); Set containerReplicas3 = generateMockContainerReplicas( CONTAINER_THREE_REPLICA_COUNT, containerID3); when(containerManager.getContainerReplicas(containerID3)) .thenReturn(containerReplicas3); // Container 4 is replicated with 5 replica - ContainerID containerID4 = new ContainerID(CONTAINER_FOUR_ID); + ContainerID containerID4 = ContainerID.valueOf(CONTAINER_FOUR_ID); Set containerReplicas4 = generateMockContainerReplicas( CONTAINER_FOUR_REPLICA_COUNT, containerID4); when(containerManager.getContainerReplicas(containerID4)) .thenReturn(containerReplicas4); // Container 5 is replicated with 2 replica - ContainerID containerID5 = new ContainerID(CONTAINER_FIVE_ID); + ContainerID containerID5 = ContainerID.valueOf(CONTAINER_FIVE_ID); Set containerReplicas5 = generateMockContainerReplicas( CONTAINER_FIVE_REPLICA_COUNT, containerID5); when(containerManager.getContainerReplicas(containerID5)) .thenReturn(containerReplicas5); // Container 6 is replicated with 3 replica - ContainerID containerID6 = new ContainerID(CONTAINER_SIX_ID); + ContainerID containerID6 = ContainerID.valueOf(CONTAINER_SIX_ID); Set containerReplicas6 = generateMockContainerReplicas( CONTAINER_SIX_REPLICA_COUNT, containerID6); when(containerManager.getContainerReplicas(containerID6)) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestOmDBInsightEndPoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestOmDBInsightEndPoint.java index aea63b41000..eb45c6abe84 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestOmDBInsightEndPoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestOmDBInsightEndPoint.java @@ -82,7 +82,7 @@ import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; -import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTask; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTaskOBS; import org.apache.hadoop.ozone.recon.tasks.NSSummaryTaskWithFSO; import org.apache.hadoop.ozone.recon.tasks.NSSummaryTaskWithLegacy; import org.apache.hadoop.ozone.recon.tasks.NSSummaryTaskWithOBS; @@ -387,8 +387,8 @@ private void setUpOmData() throws Exception { when(tableMock.getName()).thenReturn("KeyTable"); when(omMetadataManagerMock.getKeyTable(getBucketLayout())) .thenReturn(tableMock); - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, + ContainerKeyMapperTaskOBS containerKeyMapperTask = + new ContainerKeyMapperTaskOBS(reconContainerMetadataManager, ozoneConfiguration); containerKeyMapperTask.reprocess(reconOMMetadataManager); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java index e484295feb4..9fa74340768 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java @@ -174,7 +174,7 @@ public void testReconNodeDB() throws IOException, NodeNotFoundException { .getOpStateExpiryEpochSeconds()); // Upon processing the heartbeat, the illegal command should be filtered out - List returnedCmds = + List> returnedCmds = reconNodeManager.processHeartbeat(datanodeDetails); assertEquals(1, returnedCmds.size()); assertEquals(SCMCommandProto.Type.reregisterCommand, @@ -272,7 +272,7 @@ public void testDatanodeUpdate() throws IOException { datanodeDetails.setHostName("hostname2"); // Upon processing the heartbeat, the illegal command should be filtered out - List returnedCmds = + List> returnedCmds = reconNodeManager.processHeartbeat(datanodeDetails); assertEquals(1, returnedCmds.size()); assertEquals(SCMCommandProto.Type.reregisterCommand, diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerKeyMapperTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerKeyMapperTask.java index 36b335c1b46..fb31537ec7f 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerKeyMapperTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerKeyMapperTask.java @@ -132,10 +132,10 @@ public void testKeyTableReprocess() throws Exception { VOLUME_NAME, Collections.singletonList(omKeyLocationInfoGroup)); - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, + ContainerKeyMapperTaskOBS containerKeyMapperTaskOBS = + new ContainerKeyMapperTaskOBS(reconContainerMetadataManager, omConfiguration); - containerKeyMapperTask.reprocess(reconOMMetadataManager); + containerKeyMapperTaskOBS.reprocess(reconOMMetadataManager); keyPrefixesForContainer = reconContainerMetadataManager.getKeyPrefixesForContainer(1); @@ -205,10 +205,10 @@ public void testFileTableReprocess() throws Exception { KEY_ONE_SIZE); // Reprocess container key mappings - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, + ContainerKeyMapperTaskFSO containerKeyMapperTaskFSO = + new ContainerKeyMapperTaskFSO(reconContainerMetadataManager, omConfiguration); - containerKeyMapperTask.reprocess(reconOMMetadataManager); + containerKeyMapperTaskFSO.reprocess(reconOMMetadataManager); // Check the key prefixes for container 1 keyPrefixesForContainer = @@ -314,10 +314,10 @@ public void testKeyTableProcess() throws IOException { add(keyEvent2); }}, 0L); - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, + ContainerKeyMapperTaskOBS containerKeyMapperTaskOBS = + new ContainerKeyMapperTaskOBS(reconContainerMetadataManager, omConfiguration); - containerKeyMapperTask.reprocess(reconOMMetadataManager); + containerKeyMapperTaskOBS.reprocess(reconOMMetadataManager); keyPrefixesForContainer = reconContainerMetadataManager .getKeyPrefixesForContainer(1); @@ -336,7 +336,7 @@ public void testKeyTableProcess() throws IOException { assertEquals(1, reconContainerMetadataManager.getKeyCountForContainer(3L)); // Process PUT & DELETE event. - containerKeyMapperTask.process(omUpdateEventBatch, Collections.emptyMap()); + containerKeyMapperTaskOBS.process(omUpdateEventBatch, Collections.emptyMap()); keyPrefixesForContainer = reconContainerMetadataManager .getKeyPrefixesForContainer(1); @@ -384,8 +384,8 @@ public void testFileTableProcess() throws Exception { new OmKeyLocationInfoGroup(0L, omKeyLocationInfoList); // Reprocess container key mappings - ContainerKeyMapperTask containerKeyMapperTask = - new ContainerKeyMapperTask(reconContainerMetadataManager, + ContainerKeyMapperTaskFSO containerKeyMapperTaskFSO = + new ContainerKeyMapperTaskFSO(reconContainerMetadataManager, omConfiguration); String bucket = BUCKET_NAME; @@ -427,7 +427,7 @@ public void testFileTableProcess() throws Exception { }, 0L); // Process PUT event for both the keys - containerKeyMapperTask.process(omUpdateEventBatch, Collections.emptyMap()); + containerKeyMapperTaskFSO.process(omUpdateEventBatch, Collections.emptyMap()); keyPrefixesForContainer = reconContainerMetadataManager .getKeyPrefixesForContainer(1); @@ -460,7 +460,7 @@ public void testFileTableProcess() throws Exception { }, 0L); // Process DELETE event for key2 - containerKeyMapperTask.process(omUpdateEventBatch2, Collections.emptyMap()); + containerKeyMapperTaskFSO.process(omUpdateEventBatch2, Collections.emptyMap()); keyPrefixesForContainer = reconContainerMetadataManager .getKeyPrefixesForContainer(1); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerSizeCountTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerSizeCountTask.java index 032b948233e..6e444717b29 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerSizeCountTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerSizeCountTask.java @@ -90,18 +90,18 @@ public void setUp() { public void testProcess() { // mock a container with invalid used bytes ContainerInfo omContainerInfo0 = mock(ContainerInfo.class); - given(omContainerInfo0.containerID()).willReturn(new ContainerID(0)); + given(omContainerInfo0.containerID()).willReturn(ContainerID.valueOf(0)); given(omContainerInfo0.getUsedBytes()).willReturn(-1L); given(omContainerInfo0.getState()).willReturn(OPEN); // Write 2 keys ContainerInfo omContainerInfo1 = mock(ContainerInfo.class); - given(omContainerInfo1.containerID()).willReturn(new ContainerID(1)); + given(omContainerInfo1.containerID()).willReturn(ContainerID.valueOf(1)); given(omContainerInfo1.getUsedBytes()).willReturn(1500000000L); // 1.5GB given(omContainerInfo1.getState()).willReturn(CLOSED); ContainerInfo omContainerInfo2 = mock(ContainerInfo.class); - given(omContainerInfo2.containerID()).willReturn(new ContainerID(2)); + given(omContainerInfo2.containerID()).willReturn(ContainerID.valueOf(2)); given(omContainerInfo2.getUsedBytes()).willReturn(2500000000L); // 2.5GB given(omContainerInfo2.getState()).willReturn(CLOSING); @@ -134,13 +134,13 @@ public void testProcess() { // Add a new container ContainerInfo omContainerInfo3 = mock(ContainerInfo.class); - given(omContainerInfo3.containerID()).willReturn(new ContainerID(3)); + given(omContainerInfo3.containerID()).willReturn(ContainerID.valueOf(3)); given(omContainerInfo3.getUsedBytes()).willReturn(1000000000L); // 1GB given(omContainerInfo3.getState()).willReturn(QUASI_CLOSED); containers.add(omContainerInfo3); // Update existing key. - given(omContainerInfo2.containerID()).willReturn(new ContainerID(2)); + given(omContainerInfo2.containerID()).willReturn(ContainerID.valueOf(2)); given(omContainerInfo2.getUsedBytes()).willReturn(50000L); // 50KB task.processContainers(containers); @@ -178,23 +178,23 @@ public void testProcess() { public void testProcessDeletedAndNegativeSizedContainers() { // Create a list of containers, including one that is deleted ContainerInfo omContainerInfo1 = mock(ContainerInfo.class); - given(omContainerInfo1.containerID()).willReturn(new ContainerID(1)); + given(omContainerInfo1.containerID()).willReturn(ContainerID.valueOf(1)); given(omContainerInfo1.getUsedBytes()).willReturn(1500000000L); // 1.5GB given(omContainerInfo1.getState()).willReturn(OPEN); ContainerInfo omContainerInfo2 = mock(ContainerInfo.class); - given(omContainerInfo2.containerID()).willReturn(new ContainerID(2)); + given(omContainerInfo2.containerID()).willReturn(ContainerID.valueOf(2)); given(omContainerInfo2.getUsedBytes()).willReturn(2500000000L); // 2.5GB given(omContainerInfo2.getState()).willReturn(CLOSED); ContainerInfo omContainerInfoDeleted = mock(ContainerInfo.class); - given(omContainerInfoDeleted.containerID()).willReturn(new ContainerID(3)); + given(omContainerInfoDeleted.containerID()).willReturn(ContainerID.valueOf(3)); given(omContainerInfoDeleted.getUsedBytes()).willReturn(1000000000L); given(omContainerInfoDeleted.getState()).willReturn(DELETED); // 1GB // Create a mock container with negative size final ContainerInfo negativeSizeContainer = mock(ContainerInfo.class); - given(negativeSizeContainer.containerID()).willReturn(new ContainerID(0)); + given(negativeSizeContainer.containerID()).willReturn(ContainerID.valueOf(0)); given(negativeSizeContainer.getUsedBytes()).willReturn(-1L); given(negativeSizeContainer.getState()).willReturn(OPEN); @@ -202,13 +202,13 @@ public void testProcessDeletedAndNegativeSizedContainers() { final ContainerInfo negativeSizeDeletedContainer = mock(ContainerInfo.class); given(negativeSizeDeletedContainer.containerID()).willReturn( - new ContainerID(0)); + ContainerID.valueOf(0)); given(negativeSizeDeletedContainer.getUsedBytes()).willReturn(-1L); given(negativeSizeDeletedContainer.getState()).willReturn(DELETED); // Create a mock container with id 1 and updated size of 1GB from 1.5GB final ContainerInfo validSizeContainer = mock(ContainerInfo.class); - given(validSizeContainer.containerID()).willReturn(new ContainerID(1)); + given(validSizeContainer.containerID()).willReturn(ContainerID.valueOf(1)); given(validSizeContainer.getUsedBytes()).willReturn(1000000000L); // 1GB given(validSizeContainer.getState()).willReturn(CLOSED); diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/signature/StringToSignProducer.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/signature/StringToSignProducer.java index 01bbba6f0a4..a2cb0c69937 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/signature/StringToSignProducer.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/signature/StringToSignProducer.java @@ -43,10 +43,10 @@ import java.util.stream.Collectors; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.MultivaluedMap; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.ozone.s3.exception.OS3Exception; import org.apache.hadoop.ozone.s3.signature.AWSSignatureProcessor.LowerCaseKeyStringMap; import org.apache.hadoop.ozone.s3.util.S3Utils; -import org.apache.hadoop.util.StringUtils; import org.apache.kerby.util.Hex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -110,15 +110,15 @@ public static String createSignatureBase( // If the absolute path is empty, use a forward slash (/) String uri = signatureInfo.getUnfilteredURI(); - uri = (!uri.trim().isEmpty()) ? uri : "/"; + uri = StringUtils.isNotBlank(uri) ? uri : "/"; // Encode URI and preserve forward slashes - strToSign.append(signatureInfo.getAlgorithm() + NEWLINE); + strToSign.append(signatureInfo.getAlgorithm()).append(NEWLINE); if (signatureInfo.getDateTime() == null) { LOG.error("DateTime Header not found."); throw S3_AUTHINFO_CREATION_ERROR; } - strToSign.append(signatureInfo.getDateTime() + NEWLINE); - strToSign.append(credentialScope + NEWLINE); + strToSign.append(signatureInfo.getDateTime()).append(NEWLINE); + strToSign.append(credentialScope).append(NEWLINE); String canonicalRequest = buildCanonicalRequest( scheme, @@ -175,7 +175,7 @@ public static String buildCanonicalRequest( StringBuilder canonicalHeaders = new StringBuilder(); - for (String header : StringUtils.getStringCollection(signedHeaders, ";")) { + for (String header : StringUtils.split(signedHeaders, ';')) { canonicalHeaders.append(header.toLowerCase()); canonicalHeaders.append(":"); if (headers.containsKey(header)) { diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestObjectGet.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestObjectGet.java index 3e3281fa47f..9bf5f27ddd4 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestObjectGet.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestObjectGet.java @@ -117,7 +117,7 @@ public void get() throws IOException, OS3Exception { IOUtils.toString(ozoneInputStream, UTF_8); assertEquals(CONTENT, keyContent); - assertEquals("" + keyContent.length(), + assertEquals(String.valueOf(keyContent.length()), response.getHeaderString("Content-Length")); DateTimeFormatter.RFC_1123_DATE_TIME @@ -139,7 +139,7 @@ public void getKeyWithTag() throws IOException, OS3Exception { IOUtils.toString(ozoneInputStream, UTF_8); assertEquals(CONTENT, keyContent); - assertEquals("" + keyContent.length(), + assertEquals(String.valueOf(keyContent.length()), response.getHeaderString("Content-Length")); DateTimeFormatter.RFC_1123_DATE_TIME diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/signature/TestAuthorizationV4HeaderParser.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/signature/TestAuthorizationV4HeaderParser.java index 0908167f533..8812405d10d 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/signature/TestAuthorizationV4HeaderParser.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/signature/TestAuthorizationV4HeaderParser.java @@ -320,7 +320,6 @@ public void testV4HeaderSignatureValidationFailure() throws Exception { "AWS4-HMAC-SHA256 Credential=ozone/" + curDate + "/us-east-1/s3" + "/aws4_request," + "SignedHeaders=host;x-amz-content-sha256;x-amz-date," - + "" + "="; assertThrows(MalformedResourceException.class, () -> new AuthorizationV4HeaderParser(auth3, SAMPLE_DATE) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FinalizeUpgradeSubCommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FinalizeUpgradeSubCommand.java index 3a6f0fc3257..45dbaf55e4a 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FinalizeUpgradeSubCommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FinalizeUpgradeSubCommand.java @@ -17,15 +17,15 @@ package org.apache.hadoop.ozone.admin.om; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitCancellationMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitExitMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitFinishedMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitGeneralErrorMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.handleInvalidRequestAfterInitiatingFinalization; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isFinalized; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isInprogress; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitCancellationMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitExitMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitFinishedMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitGeneralErrorMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.handleInvalidRequestAfterInitiatingFinalization; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isDone; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isFinalized; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isInprogress; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isStarting; import java.io.IOException; import java.util.UUID; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeScmUpgradeSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeScmUpgradeSubcommand.java index 63f928e0998..4b123c943c1 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeScmUpgradeSubcommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeScmUpgradeSubcommand.java @@ -17,15 +17,15 @@ package org.apache.hadoop.ozone.admin.scm; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitCancellationMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitExitMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitFinishedMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.emitGeneralErrorMsg; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.handleInvalidRequestAfterInitiatingFinalization; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isFinalized; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isInprogress; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitCancellationMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitExitMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitFinishedMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.emitGeneralErrorMsg; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.handleInvalidRequestAfterInitiatingFinalization; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isDone; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isFinalized; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isInprogress; +import static org.apache.hadoop.ozone.upgrade.UpgradeFinalization.isStarting; import java.io.IOException; import java.util.UUID; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeUpgradeCommandUtil.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeUpgradeCommandUtil.java deleted file mode 100644 index f46cf83496c..00000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/scm/FinalizeUpgradeCommandUtil.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.admin.scm; - -import static org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.INVALID_REQUEST; - -import java.io.IOException; -import org.apache.hadoop.ozone.upgrade.UpgradeException; -import org.apache.hadoop.ozone.upgrade.UpgradeFinalization; - -/** - * Base class to help with Upgrade finalization command. - */ - -public final class FinalizeUpgradeCommandUtil { - - private FinalizeUpgradeCommandUtil() { - - } - - public static void handleInvalidRequestAfterInitiatingFinalization( - boolean force, UpgradeException e) throws IOException { - if (e.getResult().equals(INVALID_REQUEST)) { - if (force) { - return; - } - System.err.println("Finalization is already in progress, it is not" - + "possible to initiate it again."); - e.printStackTrace(System.err); - System.err.println("If you want to track progress from a new client" - + "for any reason, use --takeover, and the status update will be" - + "received by the new client. Note that with forcing to monitor" - + "progress from a new client, the old one initiated the upgrade" - + "will not be able to monitor the progress further and exit."); - throw new IOException("Exiting..."); - } else { - throw e; - } - } - - public static void emitExitMsg() { - System.out.println("Exiting..."); - } - - public static boolean isFinalized(UpgradeFinalization.Status status) { - return status.equals(UpgradeFinalization.Status.ALREADY_FINALIZED); - } - - public static boolean isDone(UpgradeFinalization.Status status) { - return status.equals(UpgradeFinalization.Status.FINALIZATION_DONE); - } - - public static boolean isInprogress(UpgradeFinalization.Status status) { - return status.equals(UpgradeFinalization.Status.FINALIZATION_IN_PROGRESS); - } - - public static boolean isStarting(UpgradeFinalization.Status status) { - return status.equals(UpgradeFinalization.Status.STARTING_FINALIZATION); - } - - public static void emitGeneralErrorMsg() { - System.err.println("Finalization was not successful."); - } - - public static void emitFinishedMsg(String component) { - System.out.println("Finalization of " + component + "'s metadata upgrade " - + "finished."); - } - - public static void emitCancellationMsg(String component) { - System.err.println("Finalization command was cancelled. Note that, this" - + "will not cancel finalization in " + component + ". Progress can be" - + "monitored in the Ozone Manager's log."); - } -} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/audit/parser/AuditParser.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/audit/parser/AuditParser.java similarity index 78% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/audit/parser/AuditParser.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/audit/parser/AuditParser.java index ccfadba2819..067c5e54913 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/audit/parser/AuditParser.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/audit/parser/AuditParser.java @@ -15,20 +15,21 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.audit.parser; +package org.apache.hadoop.ozone.debug.audit.parser; -import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.DebugSubcommand; import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.ozone.audit.parser.handler.LoadCommandHandler; -import org.apache.hadoop.ozone.audit.parser.handler.QueryCommandHandler; -import org.apache.hadoop.ozone.audit.parser.handler.TemplateCommandHandler; +import org.apache.hadoop.ozone.debug.audit.parser.handler.LoadCommandHandler; +import org.apache.hadoop.ozone.debug.audit.parser.handler.QueryCommandHandler; +import org.apache.hadoop.ozone.debug.audit.parser.handler.TemplateCommandHandler; +import org.kohsuke.MetaInfServices; import picocli.CommandLine.Command; import picocli.CommandLine.Parameters; /** * Ozone audit parser tool. */ -@Command(name = "ozone auditparser", +@Command(name = "auditparser", description = "Shell parser for Ozone Audit Logs", subcommands = { LoadCommandHandler.class, @@ -37,7 +38,8 @@ }, versionProvider = HddsVersionProvider.class, mixinStandardHelpOptions = true) -public class AuditParser extends GenericCli { +@MetaInfServices(DebugSubcommand.class) +public class AuditParser implements DebugSubcommand { /* <.db file path> load <.db file path> template