diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index d72e010535d9..e75e1a5c5b32 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -758,7 +758,7 @@ public Builder setDatanodeDetails(DatanodeDetails details) { this.id = details.id; this.ipAddress = details.getIpAddressAsByteString(); this.hostName = details.getHostNameAsByteString(); - this.networkName = details.getHostNameAsByteString(); + this.networkName = details.getNetworkNameAsByteString(); this.networkLocation = details.getNetworkLocationAsByteString(); this.level = details.getLevel(); this.ports = details.getPorts(); diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 6db82dfb51fc..2f2c15aff937 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -4422,6 +4422,16 @@ + + ozone.om.container.location.datanode.cache.size + 10000 + OZONE, OM + + The size of the datanode details cache used to reduce duplicate datanode objects + retained by the container location cache in Ozone Manager. + + + ozone.om.container.location.cache.ttl 360m diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 05083d506304..2e5be1036548 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -547,6 +547,11 @@ public final class OMConfigKeys { public static final int OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT = 100_000; + public static final String OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE + = "ozone.om.container.location.datanode.cache.size"; + public static final int + OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT = 10_000; + public static final String OZONE_OM_CONTAINER_LOCATION_CACHE_TTL = "ozone.om.container.location.cache.ttl"; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java index 0bdcce56e5c2..868a248e909e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java @@ -21,13 +21,17 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT; +import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.CacheLoader.InvalidCacheLoadException; import com.google.common.cache.LoadingCache; import jakarta.annotation.Nonnull; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -37,7 +41,8 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; @@ -52,21 +57,28 @@ public class ScmClient { private final StorageContainerLocationProtocol containerClient; private final LoadingCache containerLocationCache; private final CacheMetrics containerCacheMetrics; + private final CacheMetrics datanodeDetailsCacheMetrics; ScmClient(ScmBlockLocationProtocol blockClient, StorageContainerLocationProtocol containerClient, OzoneConfiguration configuration) { this.containerClient = containerClient; this.blockClient = blockClient; + Cache datanodeDetailsCache = + createDatanodeDetailsCache(configuration); this.containerLocationCache = - createContainerLocationCache(configuration, containerClient); + createContainerLocationCache(configuration, containerClient, + datanodeDetailsCache); this.containerCacheMetrics = CacheMetrics.create(containerLocationCache, "ContainerInfo"); + this.datanodeDetailsCacheMetrics = CacheMetrics.create( + datanodeDetailsCache, "DatanodeDetails"); } static LoadingCache createContainerLocationCache( OzoneConfiguration configuration, - StorageContainerLocationProtocol containerClient) { + StorageContainerLocationProtocol containerClient, + Cache datanodeDetailsCache) { int maxSize = configuration.getInt(OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE, OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT); TimeUnit unit = OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT.getUnit(); @@ -81,7 +93,9 @@ static LoadingCache createContainerLocationCache( @Nonnull @Override public Pipeline load(@Nonnull Long key) throws Exception { - return containerClient.getContainerWithPipeline(key).getPipeline(); + Pipeline pipeline = + containerClient.getContainerWithPipeline(key).getPipeline(); + return newPipelineWithDNCache(pipeline, datanodeDetailsCache); } @Nonnull @@ -92,12 +106,44 @@ public Map loadAll( .stream() .collect(Collectors.toMap( x -> x.getContainerInfo().getContainerID(), - ContainerWithPipeline::getPipeline + x -> newPipelineWithDNCache(x.getPipeline(), + datanodeDetailsCache) )); } }); } + static Cache createDatanodeDetailsCache( + OzoneConfiguration configuration) { + int datanodeCacheMaxSize = configuration.getInt( + OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE, + OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT); + + return CacheBuilder.newBuilder() + .maximumSize(datanodeCacheMaxSize) + .recordStats() + .build(); + } + + static Pipeline newPipelineWithDNCache(Pipeline pipeline, + Cache datanodeDetailsCache) { + Pipeline.Builder builder = pipeline.toBuilder(); + List nodes = new ArrayList<>(); + for (DatanodeDetails node : pipeline.getNodes()) { + DatanodeDetails datanodeDetails = + datanodeDetailsCache.getIfPresent(node.getID()); + // Call compareNodeValues to handle IP / hostname changes + if (datanodeDetails != null && node.compareNodeValues(datanodeDetails)) { + nodes.add(datanodeDetails); + } else { + datanodeDetailsCache.put(node.getID(), node); + nodes.add(node); + } + } + builder.setNodes(nodes); + return builder.build(); + } + public ScmBlockLocationProtocol getBlockClient() { return this.blockClient; } @@ -166,6 +212,7 @@ private T handleCacheExecutionException(ExecutionException e) public void close() { containerCacheMetrics.unregister(); + datanodeDetailsCacheMetrics.unregister(); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java index d055ac246c17..89d8c2162c38 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java @@ -21,6 +21,7 @@ import static java.util.Arrays.asList; import static org.apache.hadoop.hdds.client.ReplicationConfig.fromTypeAndFactor; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; @@ -28,6 +29,8 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -41,6 +44,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -106,9 +110,12 @@ public void testGetContainerLocations(String testCaseName, throws IOException { Map actualLocations = new HashMap<>(); - + List dnList = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + dnList.add(randomDatanode()); + } for (long containerId : prepopulatedIds) { - ContainerWithPipeline pipeline = createPipeline(containerId); + ContainerWithPipeline pipeline = createPipeline(containerId, dnList); actualLocations.put(containerId, pipeline); } @@ -128,7 +135,7 @@ public void testGetContainerLocations(String testCaseName, if (!expectedScmCallIds.isEmpty()) { List scmLocations = new ArrayList<>(); for (long containerId : expectedScmCallIds) { - ContainerWithPipeline pipeline = createPipeline(containerId); + ContainerWithPipeline pipeline = createPipeline(containerId, dnList); scmLocations.add(pipeline); actualLocations.put(containerId, pipeline); } @@ -166,13 +173,51 @@ public void testGetContainerLocationsWithScmFailures() throws IOException { assertEquals(runtimeException, actualRt.getCause()); } - ContainerWithPipeline createPipeline(long containerId) { + @Test + public void testDatanodeDetailsCacheRecordsStats() { + Cache datanodeDetailsCache = + ScmClient.createDatanodeDetailsCache(new OzoneConfiguration()); + + datanodeDetailsCache.getIfPresent(DatanodeID.randomID()); + + assertEquals(1, datanodeDetailsCache.stats().missCount()); + } + + @Test + public void testDatanodeDetailsCacheUpdatesIpAddressChange() { + Cache datanodeDetailsCache = + CacheBuilder.newBuilder().build(); + DatanodeDetails original = randomDatanode(); + String originalIp = original.getIpAddress(); + DatanodeDetails updated = DatanodeDetails.newBuilder() + .setDatanodeDetails(original) + .setIpAddress("updated-ip") + .build(); + + Pipeline originalPipeline = ScmClient.newPipelineWithDNCache( + createPipeline(1L, asList(original)).getPipeline(), + datanodeDetailsCache); + Pipeline refreshed = ScmClient.newPipelineWithDNCache( + createPipeline(2L, asList(updated)).getPipeline(), + datanodeDetailsCache); + + assertSame(original, originalPipeline.getNodes().get(0)); + assertEquals(originalIp, original.getIpAddress()); + assertEquals(originalIp, originalPipeline.getNodes().get(0).getIpAddress()); + DatanodeDetails refreshedNode = refreshed.getNodes().get(0); + assertSame(updated, refreshedNode); + assertEquals("updated-ip", refreshedNode.getIpAddress()); + assertSame(updated, datanodeDetailsCache.getIfPresent(original.getID())); + } + + ContainerWithPipeline createPipeline(long containerId, + List dnList) { ContainerInfo containerInfo = new ContainerInfo.Builder() .setContainerID(containerId) .build(); Pipeline pipeline = Pipeline.newBuilder() .setId(PipelineID.randomId()) - .setNodes(asList(randomDatanode(), randomDatanode())) + .setNodes(dnList) .setReplicationConfig(fromTypeAndFactor( ReplicationType.RATIS, ReplicationFactor.THREE)) .setState(Pipeline.PipelineState.OPEN)