Skip to content

HDDS-12776. ozone debug CLI command to list all Duplicate open containers #8409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
subcommands = {
ContainerInfoCommand.class,
ContainerLogParser.class,
DuplicateOpenContainersCommand.class,
ListContainers.class
},
description = "Tool to parse and store container logs from datanodes into a temporary SQLite database." +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ public Void call() throws Exception {
parser.processLogEntries(path, cdd, threadCount);

cdd.insertLatestContainerLogData();
cdd.createIndexes();
out().println("Successfully parsed the log files and updated the respective tables");

return null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.debug.logs.container;

import java.nio.file.Path;
import java.util.concurrent.Callable;
import org.apache.hadoop.hdds.cli.AbstractSubcommand;
import org.apache.hadoop.ozone.debug.logs.container.utils.ContainerDatanodeDatabase;
import picocli.CommandLine;

/**
* Subcommand to list containers that have duplicate OPEN states.
*/

@CommandLine.Command(
name = "duplicate-open",
description = "List all containers which have duplicate open states." +
"Outputs the container ID along with the count of OPEN state entries."
)
public class DuplicateOpenContainersCommand extends AbstractSubcommand implements Callable<Void> {

@CommandLine.ParentCommand
private ContainerLogController parent;

@Override
public Void call() throws Exception {
Path dbPath = parent.resolveDbPath();
if (dbPath == null) {
return null;
}

ContainerDatanodeDatabase cdd = new ContainerDatanodeDatabase(dbPath.toString());
cdd.findDuplicateOpenContainer();

return null;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
description = "Finds containers from the database based on the option provided."
)
public class ListContainers extends AbstractSubcommand implements Callable<Void> {

@CommandLine.Option(names = {"--state"},
description = "Life cycle state of the container.",
required = true)
Expand All @@ -58,7 +58,6 @@ public Void call() throws Exception {
ContainerDatanodeDatabase cdd = new ContainerDatanodeDatabase(dbPath.toString());

cdd.listContainersByState(state.name(), listOptions.getLimit());

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ private void createContainerLogTable() throws SQLException {
}
}

public void createIndexes() throws SQLException {
try (Connection connection = getConnection();
Statement stmt = connection.createStatement()) {
createIdxDclContainerStateTime(stmt);
createContainerLogIndex(stmt);
createIdxContainerlogContainerId(stmt);
} catch (SQLException e) {
throw new SQLException("Error while creating index: " + e.getMessage());
} catch (Exception e) {
throw new RuntimeException("Unexpected error: " + e);
}
}

/**
* Inserts a list of container log entries into the DatanodeContainerLogTable.
*
Expand Down Expand Up @@ -255,10 +268,7 @@ public void listContainersByState(String state, Integer limit) throws SQLExcepti
String baseQuery = SQLDBConstants.SELECT_LATEST_CONTAINER_LOGS_BY_STATE;
String finalQuery = limitProvided ? baseQuery + " LIMIT ?" : baseQuery;

try (Connection connection = getConnection();
Statement stmt = connection.createStatement()) {

createContainerLogIndex(stmt);
try (Connection connection = getConnection()) {

try (PreparedStatement pstmt = connection.prepareStatement(finalQuery)) {
pstmt.setString(1, state);
Expand Down Expand Up @@ -304,11 +314,9 @@ public void listContainersByState(String state, Integer limit) throws SQLExcepti
}
}

private void createIdxDclContainerStateTime(Connection conn) throws SQLException {
String sql = SQLDBConstants.CREATE_DCL_CONTAINER_STATE_TIME_INDEX;
try (Statement stmt = conn.createStatement()) {
stmt.execute(sql);
}
private void createIdxDclContainerStateTime(Statement stmt) throws SQLException {
String createIndexSQL = SQLDBConstants.CREATE_DCL_CONTAINER_STATE_TIME_INDEX;
stmt.execute(createIndexSQL);
}

/**
Expand All @@ -323,7 +331,7 @@ private void createIdxDclContainerStateTime(Connection conn) throws SQLException
public void showContainerDetails(Long containerID) throws SQLException {

try (Connection connection = getConnection()) {
createIdxDclContainerStateTime(connection);

List<DatanodeContainerInfo> logEntries = getContainerLogData(containerID, connection);

if (logEntries.isEmpty()) {
Expand Down Expand Up @@ -541,5 +549,65 @@ private List<DatanodeContainerInfo> getContainerLogData(Long containerID, Connec

return logEntries;
}

private void createIdxContainerlogContainerId(Statement stmt) throws SQLException {
String createIndexSQL = SQLDBConstants.CREATE_CONTAINER_ID_INDEX;
stmt.execute(createIndexSQL);
}

public void findDuplicateOpenContainer() throws SQLException {
String sql = SQLDBConstants.SELECT_DISTINCT_CONTAINER_IDS_QUERY;

try (Connection connection = getConnection()) {

try (PreparedStatement statement = connection.prepareStatement(sql);
ResultSet resultSet = statement.executeQuery()) {
int count = 0;

while (resultSet.next()) {
Long containerID = resultSet.getLong("container_id");
List<DatanodeContainerInfo> logEntries = getContainerLogDataForOpenContainers(containerID, connection);
boolean hasIssue = checkForMultipleOpenStates(logEntries);
if (hasIssue) {
int openStateCount = (int) logEntries.stream()
.filter(entry -> "OPEN".equalsIgnoreCase(entry.getState()))
.count();
count++;
out.println("Container ID: " + containerID + " - OPEN state count: " + openStateCount);
}
}

out.println("Total containers that might have duplicate OPEN state : " + count);

}
} catch (SQLException e) {
throw new SQLException("Error while retrieving containers." + e.getMessage(), e);
} catch (Exception e) {
throw new RuntimeException("Unexpected error: " + e);
}
}

private List<DatanodeContainerInfo> getContainerLogDataForOpenContainers(Long containerID, Connection connection)
throws SQLException {
String query = SQLDBConstants.SELECT_CONTAINER_DETAILS_OPEN_STATE;
List<DatanodeContainerInfo> logEntries = new ArrayList<>();

try (PreparedStatement preparedStatement = connection.prepareStatement(query)) {
preparedStatement.setLong(1, containerID);
try (ResultSet rs = preparedStatement.executeQuery()) {
while (rs.next()) {
DatanodeContainerInfo entry = new DatanodeContainerInfo.Builder()
.setTimestamp(rs.getString("timestamp"))
.setContainerId(rs.getLong("container_id"))
.setDatanodeId(rs.getString("datanode_id"))
.setState(rs.getString("container_state"))
.build();
logEntries.add(entry);
}
}
}

return logEntries;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ public final class SQLDBConstants {
"WHERE d.container_id = ? ORDER BY d.datanode_id ASC, d.timestamp ASC;";
public static final String CREATE_DCL_CONTAINER_STATE_TIME_INDEX = "CREATE INDEX IF NOT EXISTS " +
"idx_dcl_container_state_time ON DatanodeContainerLogTable(container_id, container_state, timestamp);";
public static final String CREATE_CONTAINER_ID_INDEX = "CREATE INDEX IF NOT EXISTS idx_containerlog_container_id " +
"ON ContainerLogTable(container_id);";
public static final String SELECT_DISTINCT_CONTAINER_IDS_QUERY =
"SELECT DISTINCT container_id FROM ContainerLogTable";
public static final String SELECT_CONTAINER_DETAILS_OPEN_STATE = "SELECT d.timestamp, d.container_id, " +
"d.datanode_id, d.container_state FROM DatanodeContainerLogTable d " +
"WHERE d.container_id = ? AND d.container_state = 'OPEN' ORDER BY d.timestamp ASC;";

private SQLDBConstants() {
//Never constructed
Expand Down