diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
index d72e010535d9..e75e1a5c5b32 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
@@ -758,7 +758,7 @@ public Builder setDatanodeDetails(DatanodeDetails details) {
this.id = details.id;
this.ipAddress = details.getIpAddressAsByteString();
this.hostName = details.getHostNameAsByteString();
- this.networkName = details.getHostNameAsByteString();
+ this.networkName = details.getNetworkNameAsByteString();
this.networkLocation = details.getNetworkLocationAsByteString();
this.level = details.getLevel();
this.ports = details.getPorts();
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 6db82dfb51fc..2f2c15aff937 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -4422,6 +4422,16 @@
+
+ ozone.om.container.location.datanode.cache.size
+ 10000
+ OZONE, OM
+
+ The size of the datanode details cache used to reduce duplicate datanode objects
+ retained by the container location cache in Ozone Manager.
+
+
+
ozone.om.container.location.cache.ttl
360m
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index 05083d506304..2e5be1036548 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -547,6 +547,11 @@ public final class OMConfigKeys {
public static final int OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT
= 100_000;
+ public static final String OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE
+ = "ozone.om.container.location.datanode.cache.size";
+ public static final int
+ OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT = 10_000;
+
public static final String OZONE_OM_CONTAINER_LOCATION_CACHE_TTL
= "ozone.om.container.location.cache.ttl";
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java
index 0bdcce56e5c2..868a248e909e 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java
@@ -21,13 +21,17 @@
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT;
+import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.CacheLoader.InvalidCacheLoadException;
import com.google.common.cache.LoadingCache;
import jakarta.annotation.Nonnull;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -37,7 +41,8 @@
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.DatanodeID;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
@@ -52,21 +57,28 @@ public class ScmClient {
private final StorageContainerLocationProtocol containerClient;
private final LoadingCache containerLocationCache;
private final CacheMetrics containerCacheMetrics;
+ private final CacheMetrics datanodeDetailsCacheMetrics;
ScmClient(ScmBlockLocationProtocol blockClient,
StorageContainerLocationProtocol containerClient,
OzoneConfiguration configuration) {
this.containerClient = containerClient;
this.blockClient = blockClient;
+ Cache datanodeDetailsCache =
+ createDatanodeDetailsCache(configuration);
this.containerLocationCache =
- createContainerLocationCache(configuration, containerClient);
+ createContainerLocationCache(configuration, containerClient,
+ datanodeDetailsCache);
this.containerCacheMetrics = CacheMetrics.create(containerLocationCache,
"ContainerInfo");
+ this.datanodeDetailsCacheMetrics = CacheMetrics.create(
+ datanodeDetailsCache, "DatanodeDetails");
}
static LoadingCache createContainerLocationCache(
OzoneConfiguration configuration,
- StorageContainerLocationProtocol containerClient) {
+ StorageContainerLocationProtocol containerClient,
+ Cache datanodeDetailsCache) {
int maxSize = configuration.getInt(OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE,
OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT);
TimeUnit unit = OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT.getUnit();
@@ -81,7 +93,9 @@ static LoadingCache createContainerLocationCache(
@Nonnull
@Override
public Pipeline load(@Nonnull Long key) throws Exception {
- return containerClient.getContainerWithPipeline(key).getPipeline();
+ Pipeline pipeline =
+ containerClient.getContainerWithPipeline(key).getPipeline();
+ return newPipelineWithDNCache(pipeline, datanodeDetailsCache);
}
@Nonnull
@@ -92,12 +106,44 @@ public Map loadAll(
.stream()
.collect(Collectors.toMap(
x -> x.getContainerInfo().getContainerID(),
- ContainerWithPipeline::getPipeline
+ x -> newPipelineWithDNCache(x.getPipeline(),
+ datanodeDetailsCache)
));
}
});
}
+ static Cache createDatanodeDetailsCache(
+ OzoneConfiguration configuration) {
+ int datanodeCacheMaxSize = configuration.getInt(
+ OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE,
+ OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT);
+
+ return CacheBuilder.newBuilder()
+ .maximumSize(datanodeCacheMaxSize)
+ .recordStats()
+ .build();
+ }
+
+ static Pipeline newPipelineWithDNCache(Pipeline pipeline,
+ Cache datanodeDetailsCache) {
+ Pipeline.Builder builder = pipeline.toBuilder();
+ List nodes = new ArrayList<>();
+ for (DatanodeDetails node : pipeline.getNodes()) {
+ DatanodeDetails datanodeDetails =
+ datanodeDetailsCache.getIfPresent(node.getID());
+ // Call compareNodeValues to handle IP / hostname changes
+ if (datanodeDetails != null && node.compareNodeValues(datanodeDetails)) {
+ nodes.add(datanodeDetails);
+ } else {
+ datanodeDetailsCache.put(node.getID(), node);
+ nodes.add(node);
+ }
+ }
+ builder.setNodes(nodes);
+ return builder.build();
+ }
+
public ScmBlockLocationProtocol getBlockClient() {
return this.blockClient;
}
@@ -166,6 +212,7 @@ private T handleCacheExecutionException(ExecutionException e)
public void close() {
containerCacheMetrics.unregister();
+ datanodeDetailsCacheMetrics.unregister();
}
}
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java
index d055ac246c17..89d8c2162c38 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java
@@ -21,6 +21,7 @@
import static java.util.Arrays.asList;
import static org.apache.hadoop.hdds.client.ReplicationConfig.fromTypeAndFactor;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.eq;
import static org.mockito.Mockito.mock;
@@ -28,6 +29,8 @@
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -41,6 +44,7 @@
import org.apache.hadoop.hdds.client.ReplicationType;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.DatanodeID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@@ -106,9 +110,12 @@ public void testGetContainerLocations(String testCaseName,
throws IOException {
Map actualLocations = new HashMap<>();
-
+ List dnList = new ArrayList<>();
+ for (int i = 0; i < 3; i++) {
+ dnList.add(randomDatanode());
+ }
for (long containerId : prepopulatedIds) {
- ContainerWithPipeline pipeline = createPipeline(containerId);
+ ContainerWithPipeline pipeline = createPipeline(containerId, dnList);
actualLocations.put(containerId, pipeline);
}
@@ -128,7 +135,7 @@ public void testGetContainerLocations(String testCaseName,
if (!expectedScmCallIds.isEmpty()) {
List scmLocations = new ArrayList<>();
for (long containerId : expectedScmCallIds) {
- ContainerWithPipeline pipeline = createPipeline(containerId);
+ ContainerWithPipeline pipeline = createPipeline(containerId, dnList);
scmLocations.add(pipeline);
actualLocations.put(containerId, pipeline);
}
@@ -166,13 +173,51 @@ public void testGetContainerLocationsWithScmFailures() throws IOException {
assertEquals(runtimeException, actualRt.getCause());
}
- ContainerWithPipeline createPipeline(long containerId) {
+ @Test
+ public void testDatanodeDetailsCacheRecordsStats() {
+ Cache datanodeDetailsCache =
+ ScmClient.createDatanodeDetailsCache(new OzoneConfiguration());
+
+ datanodeDetailsCache.getIfPresent(DatanodeID.randomID());
+
+ assertEquals(1, datanodeDetailsCache.stats().missCount());
+ }
+
+ @Test
+ public void testDatanodeDetailsCacheUpdatesIpAddressChange() {
+ Cache datanodeDetailsCache =
+ CacheBuilder.newBuilder().build();
+ DatanodeDetails original = randomDatanode();
+ String originalIp = original.getIpAddress();
+ DatanodeDetails updated = DatanodeDetails.newBuilder()
+ .setDatanodeDetails(original)
+ .setIpAddress("updated-ip")
+ .build();
+
+ Pipeline originalPipeline = ScmClient.newPipelineWithDNCache(
+ createPipeline(1L, asList(original)).getPipeline(),
+ datanodeDetailsCache);
+ Pipeline refreshed = ScmClient.newPipelineWithDNCache(
+ createPipeline(2L, asList(updated)).getPipeline(),
+ datanodeDetailsCache);
+
+ assertSame(original, originalPipeline.getNodes().get(0));
+ assertEquals(originalIp, original.getIpAddress());
+ assertEquals(originalIp, originalPipeline.getNodes().get(0).getIpAddress());
+ DatanodeDetails refreshedNode = refreshed.getNodes().get(0);
+ assertSame(updated, refreshedNode);
+ assertEquals("updated-ip", refreshedNode.getIpAddress());
+ assertSame(updated, datanodeDetailsCache.getIfPresent(original.getID()));
+ }
+
+ ContainerWithPipeline createPipeline(long containerId,
+ List dnList) {
ContainerInfo containerInfo = new ContainerInfo.Builder()
.setContainerID(containerId)
.build();
Pipeline pipeline = Pipeline.newBuilder()
.setId(PipelineID.randomId())
- .setNodes(asList(randomDatanode(), randomDatanode()))
+ .setNodes(dnList)
.setReplicationConfig(fromTypeAndFactor(
ReplicationType.RATIS, ReplicationFactor.THREE))
.setState(Pipeline.PipelineState.OPEN)