Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
public static final String FAILED_DB_VOLUMES_TOLERATED_KEY = "hdds.datanode.failed.db.volumes.tolerated";
public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap";
public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout";
public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap";

// Minimum space should be left on volume.
// Ex: If volume has 1000GB and minFreeSpace is configured as 10GB,
Expand Down Expand Up @@ -99,6 +100,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig {

static final Duration DISK_CHECK_TIMEOUT_DEFAULT = Duration.ofMinutes(10);

static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1);

static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true;
static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024;
static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64;
Expand Down Expand Up @@ -404,6 +407,17 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
)
private Duration diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;

@Config(key = DISK_CHECK_RETRY_GAP_KEY,
defaultValue = "1m",
Comment thread
ptlrs marked this conversation as resolved.
Outdated
type = ConfigType.TIME,
tags = {DATANODE},
description = "Time to wait between retries of disk checks."
+ " To ignore transient issues, the RocksDb instance on a disk is validated multiple times before"
+ " declaring failure. This configuration defines the time to wait between the retry attempts."
+ " Unit could be defined with postfix (ns,ms,s,m,h,d)."
)
private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;

@Config(key = "hdds.datanode.chunk.data.validation.check",
defaultValue = "false",
type = ConfigType.BOOLEAN,
Expand Down Expand Up @@ -688,6 +702,19 @@ public void validate() {
diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;
}

if (diskCheckRetryGap.isNegative()) {
LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}",
DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT);
diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;
}

if (diskCheckRetryGap.compareTo(diskCheckTimeout) > 0) {
Comment thread
ptlrs marked this conversation as resolved.
Outdated
LOG.warn("{} was set to {}. It must be less than {} which is {}. Defaulting to {}",
DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckTimeout,
DISK_CHECK_RETRY_GAP_DEFAULT);
diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;
}

if (blockDeleteCommandWorkerInterval.isNegative()) {
LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL +
" must be greater than zero and was set to {}. Defaulting to {}",
Expand Down Expand Up @@ -903,6 +930,10 @@ public Duration getDiskCheckTimeout() {
return diskCheckTimeout;
}

public Duration getDiskCheckRetryGap() {
return diskCheckRetryGap;
}

public void setDiskCheckTimeout(Duration duration) {
diskCheckTimeout = duration;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import jakarta.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -326,17 +327,31 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException
return VolumeCheckResult.HEALTHY;
}

// We attempt to open RocksDb twice to ignore any transient errors
// and to confirm that we actually cannot open RocksDb in readonly mode.
final boolean isVolumeTestResultHealthy = true;
try (ManagedOptions managedOptions = new ManagedOptions();
ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) {
volumeTestResultQueue.add(isVolumeTestResultHealthy);
} catch (Exception e) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Check of database for volume " + this + " interrupted.");
final int maxAttempts = 2;
Comment thread
ptlrs marked this conversation as resolved.
Outdated
final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap();
for (int attempt = 0; attempt < maxAttempts; attempt++) {
try (ManagedOptions managedOptions = new ManagedOptions();
Comment thread
ptlrs marked this conversation as resolved.
Outdated
ManagedRocksDB ignored =
ManagedRocksDB.openAsSecondary(managedOptions, dbFile.toString(), getTmpDir().getPath())) {
volumeTestResultQueue.add(isVolumeTestResultHealthy);
break;
} catch (Exception e) {
Comment thread
ptlrs marked this conversation as resolved.
Outdated
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Check of database for volume " + this + " interrupted.");
}

if (attempt == maxAttempts - 1) {
Comment thread
ptlrs marked this conversation as resolved.
Outdated
LOG.error("Could not open Volume DB located at {}", dbFile, e);
volumeTestResultQueue.add(!isVolumeTestResultHealthy);
volumeTestFailureCount.incrementAndGet();
} else {
LOG.warn("Could not open Volume DB located at {}", dbFile, e);
Thread.sleep(maxRetryGap.toMillis());
Comment thread
ptlrs marked this conversation as resolved.
Outdated
}
}
LOG.warn("Could not open Volume DB located at {}", dbFile, e);
volumeTestResultQueue.add(!isVolumeTestResultHealthy);
volumeTestFailureCount.incrementAndGet();
}

if (volumeTestResultQueue.size() > volumeTestCount
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ public static ManagedRocksDB openReadOnly(
);
}

public static ManagedRocksDB openAsSecondary(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ptlrs thanks for all the research you've done on secondary instances. It would be great to put a summary as a javadoc above this method about how they work and what we can expect vs. readonly instances.

final ManagedOptions options,
final String dbPath,
final String secondaryDbLogFilePath)
throws RocksDBException {
return new ManagedRocksDB(RocksDB.openAsSecondary(options, dbPath, secondaryDbLogFilePath));
Comment thread
ptlrs marked this conversation as resolved.
}

public static ManagedRocksDB open(
final DBOptions options, final String path,
final List<ColumnFamilyDescriptor> columnFamilyDescriptors,
Expand Down