diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java index 31b890fdbd3..7dd8157ead6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java @@ -707,10 +707,15 @@ public synchronized void incrementBlockBytes(long delta) { public synchronized void decDeletion(long deletedBytes, long processedBytes, long deletedBlockCount, long processedBlockCount) { + + // After subtraction if blockBytes is 0, let it be. Only if it becomes negative, set the size to 1 byte. blockBytes -= deletedBytes; - blockCount -= deletedBlockCount; - blockPendingDeletion -= processedBlockCount; - blockPendingDeletionBytes -= processedBytes; + if (blockBytes < 0) { + blockBytes = 1L; + } + blockCount = Math.max(0L, blockCount - deletedBlockCount); + blockPendingDeletion = Math.max(0L, blockPendingDeletion - processedBlockCount); + blockPendingDeletionBytes = Math.max(0L, blockPendingDeletionBytes - processedBytes); } public synchronized void updateBlocks(long bytes, long count) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java index 968e331103e..1ea92309aa2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.createContainer; import static org.apache.ozone.test.GenericTestUtils.waitFor; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import com.google.common.collect.ImmutableList; import java.io.IOException; @@ -53,8 +54,11 @@ import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.GenericTestUtils.LogCapturer; @@ -65,6 +69,7 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.slf4j.event.Level; /** @@ -155,6 +160,54 @@ void targetPullsFromWrongService() throws Exception { ReplicationSupervisor::getReplicationFailureCount); } + /** + * Replication must succeed even when the source container's persisted + * {@code CONTAINER_BYTES_USED} RocksDB counter has drifted negative. + */ + @ParameterizedTest + @ValueSource(longs = {0L, 1L, -1_234_567_890L}) + void pushSucceedsWhenSourceBytesUsedIsNegative(long containerSize) throws Exception { + DatanodeDetails source = cluster.getHddsDatanodes().get(0) + .getDatanodeDetails(); + DatanodeDetails target = selectOtherNode(source); + + long containerID = createOverAllocatedContainer(source, 2L * 1024L * 1024L); + + poisonBytesUsed(source, containerID, containerSize); + + ReplicateContainerCommand cmd = + ReplicateContainerCommand.toTarget(containerID, target); + + queueAndWaitForCompletion(cmd, source, + ReplicationSupervisor::getReplicationSuccessCount); + + // Target must end up hosting the container. + Container imported = cluster.getHddsDatanode(target) + .getDatanodeStateMachine() + .getContainer() + .getContainerSet() + .getContainer(containerID); + assertNotNull(imported, "target should import the container despite a negative bytesUsed on source"); + } + + private void poisonBytesUsed(DatanodeDetails dn, long containerID, long poisonValue) throws IOException { + HddsDatanodeService dnService = cluster.getHddsDatanode(dn); + Container container = dnService.getDatanodeStateMachine().getContainer() + .getContainerSet().getContainer(containerID); + KeyValueContainerData data = + (KeyValueContainerData) container.getContainerData(); + + try (DBHandle db = BlockUtils.getDB(data, dnService.getConf())) { + db.getStore().getMetadataTable() + .put(data.getBytesUsedKey(), poisonValue); + } + // Keep the in-memory Statistics counter consistent with the on-disk + // poisoned value. The import failure is driven by the on-disk value (what + // the target reads), but this prevents any subsequent close/flush path on + // the source from silently correcting the poison before packing. + data.getStatistics().setBlockBytesForTesting(poisonValue); + } + /** * Replication fails because source tries to push a non-existent container. */