Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ commands:
name: "Run devnet test"
timeout: 20m # Allow 20 minutes total
command: |
./.circleci/db_backup_ci.sh # run the db checkpoint test script first, and clean the dev ledgers afterwards
snarkos clean --dev 0
snarkos clean --dev 1
snarkos clean --dev 2
snarkos clean --dev 3
./.circleci/devnet_ci.sh << parameters.validators >> << parameters.clients >> << parameters.network_id >> << parameters.min_height >>
- clear_environment:
cache_key: << parameters.cache_key >>
Expand Down
166 changes: 166 additions & 0 deletions .circleci/db_backup_ci.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/bin/bash

# Network parameters
total_validators=4
network_id=0
network_name="mainnet"

# Stopping conditions
checkpoint_height=3
rollback_height=10
num_checkpoints=0
remaining_checkpoints=2

# Use fixed JWT values in order to be able to create checkpoints
jwt_secret="ZGJjaGVja3BvaW50dGVzdA=="
jwt_ts=1749116345
jwt[0]="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhbGVvMXJoZ2R1NzdoZ3lxZDN4amo4dWN1M2pqOXIya3J3ejZtbnp5ZDgwZ25jcjVmeGN3bGg1cnN2enA5cHgiLCJpYXQiOjE3NDkxMTYzNDUsImV4cCI6MjA2NDQ3NjM0NX0.qm2idfIm4ZTFOsyT19lH9pcWzzAtP5mbymkN4oL6_sc"
jwt[1]="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhbGVvMXMzd3M1dHJhODdmanljbmpyd3NqY3JudzJxeHI4amZxcWR1Z25mMHh6cXF3MjlxOW01cHFlbTJ1NHQiLCJpYXQiOjE3NDkxMTYzNDUsImV4cCI6MjA2NDQ3NjM0NX0.4efs4qWJuG0Lm2CxrLMIKrrbJiGD-XNqHlk_AUaXOBo"
jwt[2]="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhbGVvMWFzaHl1OTZ0andlNjN1MGd0bm52OHo1bGhhcGR1NGw1cGpzbDJraGE3ZnY3aHZ6MmVxeHM1ZHowcmciLCJpYXQiOjE3NDkxMTYzNDUsImV4cCI6MjA2NDQ3NjM0NX0.zxO1ajmQ0Wqr1gg4NuRzH4i_hiUBt7_fP9WP3KHbp4c"
jwt[3]="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhbGVvMTJ1eDNnZGF1Y2swdjYwd2VzdGdjcHFqN3Y4cnJjcjN2MzQ2ZTRqdHEwNHE3a2t0MjJjenNoODA4djIiLCJpYXQiOjE3NDkxMTYzNDUsImV4cCI6MjA2NDQ3NjM0NX0.bJZ-fcrJwaI5YdPXDQ1nySV-jmxeABQCSvL1Ag9CSpo"

# Array to store PIDs of all processes
declare -a PIDS

# Start all validator nodes in the background
for ((validator_index = 0; validator_index < $total_validators; validator_index++)); do
snarkos start --nodisplay --network $network_id --dev $validator_index --dev-num-validators $total_validators --validator --jwt-secret $jwt_secret --jwt-timestamp $jwt_ts &
PIDS[$validator_index]=$!
echo "Started validator $validator_index with PID ${PIDS[$validator_index]}"
# Add 1-second delay between starting nodes to avoid hitting rate limits
sleep 1
done

# Function to check block heights; the 1st parameter is the desired height
check_heights() {
echo "Checking block heights on all nodes..."
num_done=0
for ((node_index = 0; node_index < $total_validators; node_index++)); do
port=$((3030 + node_index))
height=$(curl -s "http://127.0.0.1:$port/$network_name/block/height/latest" || echo "0")

# Track highest height for reporting
if [[ "$height" =~ ^[0-9]+$ ]] && [ $height -ge $1 ]; then
num_done=$((num_done + 1))
fi
done

if [ $num_done -eq $total_validators ]; then
echo "All nodes reached the height of $1"
return 0
else
return 1
fi
}

# Create database checkpoints
create_checkpoints() {
for ((node_index = 0; node_index < $total_validators; node_index++)); do
port=$((3030 + node_index))
suffix="${node_index}_$1"
result=$(curl -s -X "POST" -H "Authorization: Bearer ${jwt[node_index]}" "http://127.0.0.1:$port/$network_name/db_backup?path=/tmp/checkpoint_$suffix" || echo "fail")

# Track highest height for reporting
if [ "$result" = "fail" ]; then
return 1
fi
done

echo "All nodes created a checkpoint"
return 0
}

# Wait for 15 seconds to let the network start
echo "Waiting 15 seconds for network to start up..."
sleep 15
Comment thread
vicsn marked this conversation as resolved.

# Check heights periodically with a timeout
total_wait=0
checkpoint_created=false
while [ $total_wait -lt 300 ]; do # 5 minutes max
# Apply short-circuiting
if [[ $checkpoint_created = true ]] || check_heights "$checkpoint_height"; then
if [[ $checkpoint_created = false ]]; then
# Create checkpoints at the specified height
create_checkpoints $num_checkpoints
checkpoint_created=true
checkpoint_height=$((checkpoint_height+2))
num_checkpoints=$((num_checkpoints+1))

echo "num_checkpoints: $num_checkpoints"
sleep 2
fi

# Wait until the specified rollback height is reached
if check_heights "$rollback_height"; then
echo "All nodes reached rollback height."

checkpoint_created=false

# Gracefully shut down the validators
for pid in "${PIDS[@]}"; do
kill -15 $pid 2>/dev/null || true
done
# Wait until the shutdown concludes.
sleep 5

for ((validator_index = 0; validator_index < $total_validators; validator_index++)); do
# Remove the original ledger
if (( num_checkpoints == 1 )); then
snarkos clean --network $network_id --dev $validator_index
else
suffix="${validator_index}_$((num_checkpoints-2))"
snarkos clean --network $network_id --dev $validator_index --path=/tmp/checkpoint_$suffix
fi
# Wait until the cleanup concludes
sleep 1
# Restart using the checkpoint
suffix="${validator_index}_$((num_checkpoints-1))"
snarkos start --nodisplay --network $network_id --dev $validator_index --dev-num-validators $total_validators --validator --jwt-secret $jwt_secret --jwt-timestamp $jwt_ts --storage /tmp/checkpoint_$suffix &
PIDS[$validator_index]=$!
echo "Restarted validator $validator_index with PID ${PIDS[$validator_index]}"
# Add 1-second delay between starting nodes to avoid hitting rate limits
sleep 1

port=$((3030 + validator_index))
height=$(curl -s "http://127.0.0.1:$port/$network_name/block/height/latest" || echo "0")
echo "Node height after restart: $height"

# Ensure that the height is below the rollback height
if [[ "$height" =~ ^[0-9]+$ ]] && (( height >= rollback_height )) && (( height < checkpoint_height )); then
echo "❌ Test failed!"
exit 1
fi
done

if (( remaining_checkpoints == 0 )); then
echo "SUCCESS!"

# Cleanup: kill all processes
for pid in "${PIDS[@]}"; do
kill -9 $pid 2>/dev/null || true
done

exit 0
fi

remaining_checkpoints=$((remaining_checkpoints-1))

fi
fi

# Continue waiting
sleep 3
total_wait=$((total_wait + 3))
echo "Waited $total_wait seconds so far..."
done

# The main loop has expired by now
echo "❌ Test failed!"

# Cleanup: kill all processes
for pid in "${PIDS[@]}"; do
kill -9 $pid 2>/dev/null || true
done

exit 1
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ default-features = false
[workspace.dependencies.snarkvm]
#path = "../snarkVM"
git = "https://github.com/ProvableHQ/snarkVM.git"
rev = "668b72b"
rev = "0e0e3c7"
#version = "=3.8.0"
default-features = false
#features = [ "circuit", "console", "rocks" ]
Expand Down
6 changes: 3 additions & 3 deletions cli/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -708,9 +708,9 @@ impl Start {

// Initialize the node.
match node_type {
NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, shutdown.clone()).await,
NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, shutdown.clone()).await,
NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, shutdown).await,
NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, shutdown.clone()).await,
NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, self.dev, shutdown.clone()).await,
NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, shutdown).await,
}
}

Expand Down
14 changes: 11 additions & 3 deletions node/bft/examples/simple_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ pub async fn start_bft(
// Initialize the BFT instance.
let block_sync = Arc::new(BlockSync::new(ledger.clone()));
let mut bft =
BFT::<CurrentNetwork>::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode)?;
BFT::<CurrentNetwork>::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?;
// Run the BFT instance.
bft.run(None, Some(consensus_sender), sender.clone(), receiver).await?;
// Retrieve the BFT's primary.
Expand Down Expand Up @@ -184,8 +184,16 @@ pub async fn start_primary(
let trusted_validators = trusted_validators(node_id, num_nodes, peers);
// Initialize the primary instance.
let block_sync = Arc::new(BlockSync::new(ledger.clone()));
let mut primary =
Primary::<CurrentNetwork>::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode)?;
let mut primary = Primary::<CurrentNetwork>::new(
account,
storage,
ledger,
block_sync,
ip,
&trusted_validators,
storage_mode,
None,
)?;
// Run the primary instance.
primary.run(None, None, sender.clone(), receiver).await?;
// Handle OS signals.
Expand Down
15 changes: 13 additions & 2 deletions node/bft/src/bft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ pub struct BFT<N: Network> {

impl<N: Network> BFT<N> {
/// Initializes a new instance of the BFT.
#[allow(clippy::too_many_arguments)]
pub fn new(
account: Account<N>,
storage: Storage<N>,
Expand All @@ -96,9 +97,10 @@ impl<N: Network> BFT<N> {
ip: Option<SocketAddr>,
trusted_validators: &[SocketAddr],
storage_mode: StorageMode,
dev: Option<u16>,
) -> Result<Self> {
Ok(Self {
primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode)?,
primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev)?,
dag: Default::default(),
leader_certificate: Default::default(),
leader_certificate_timer: Default::default(),
Expand Down Expand Up @@ -991,7 +993,16 @@ mod tests {
// Create the block synchronization logic.
let block_sync = Arc::new(BlockSync::new(ledger.clone()));
// Initialize the BFT.
BFT::new(account.clone(), storage.clone(), ledger.clone(), block_sync, None, &[], StorageMode::new_test(None))
BFT::new(
account.clone(),
storage.clone(),
ledger.clone(),
block_sync,
None,
&[],
StorageMode::new_test(None),
None,
)
}

#[test]
Expand Down
7 changes: 4 additions & 3 deletions node/bft/src/primary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ impl<N: Network> Primary<N> {
pub const MAX_TRANSMISSIONS_TOLERANCE: usize = BatchHeader::<N>::MAX_TRANSMISSIONS_PER_BATCH * 2;

/// Initializes a new primary instance.
#[allow(clippy::too_many_arguments)]
pub fn new(
account: Account<N>,
storage: Storage<N>,
Expand All @@ -127,10 +128,10 @@ impl<N: Network> Primary<N> {
ip: Option<SocketAddr>,
trusted_validators: &[SocketAddr],
storage_mode: StorageMode,
dev: Option<u16>,
) -> Result<Self> {
// Initialize the gateway.
let gateway =
Gateway::new(account, storage.clone(), ledger.clone(), ip, trusted_validators, storage_mode.dev())?;
let gateway = Gateway::new(account, storage.clone(), ledger.clone(), ip, trusted_validators, dev)?;
// Initialize the sync module.
let sync = Sync::new(gateway.clone(), storage.clone(), ledger.clone(), block_sync);

Expand Down Expand Up @@ -1996,7 +1997,7 @@ mod tests {
let account = accounts[account_index].1.clone();
let block_sync = Arc::new(BlockSync::new(ledger.clone()));
let mut primary =
Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None)).unwrap();
Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).unwrap();

// Construct a worker instance.
primary.workers = Arc::from([Worker::new(
Expand Down
2 changes: 2 additions & 0 deletions node/bft/tests/common/primary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ impl TestNetwork {
Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)),
&[],
StorageMode::new_test(None),
None,
)
.unwrap();
(bft.primary().clone(), Some(bft))
Expand All @@ -188,6 +189,7 @@ impl TestNetwork {
Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)),
&[],
StorageMode::new_test(None),
None,
)
.unwrap();
(primary, None)
Expand Down
5 changes: 4 additions & 1 deletion node/consensus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ pub struct Consensus<N: Network> {

impl<N: Network> Consensus<N> {
/// Initializes a new instance of consensus and spawn its background tasks.
#[allow(clippy::too_many_arguments)]
pub async fn new(
account: Account<N>,
ledger: Arc<dyn LedgerService<N>>,
Expand All @@ -131,6 +132,7 @@ impl<N: Network> Consensus<N> {
trusted_validators: &[SocketAddr],
storage_mode: StorageMode,
ping: Arc<Ping<N>>,
dev: Option<u16>,
) -> Result<Self> {
// Initialize the primary channels.
let (primary_sender, primary_receiver) = init_primary_channels::<N>();
Expand All @@ -139,7 +141,8 @@ impl<N: Network> Consensus<N> {
// Initialize the Narwhal storage.
let storage = NarwhalStorage::new(ledger.clone(), transmissions, BatchHeader::<N>::MAX_GC_ROUNDS as u64);
// Initialize the BFT.
let bft = BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode)?;
let bft =
BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev)?;
// Create a new instance of Consensus.
let mut _self = Self {
ledger,
Expand Down
1 change: 1 addition & 0 deletions node/rest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ impl<N: Network, C: ConsensusStorage<N>, R: Routing<N>> Rest<N, C, R> {
// All the endpoints before the call to `route_layer` are protected with JWT auth.
.route(&format!("/{network}/node/address"), get(Self::get_node_address))
.route(&format!("/{network}/program/{{id}}/mapping/{{name}}"), get(Self::get_mapping_values))
.route(&format!("/{network}/db_backup"), post(Self::db_backup))
.route_layer(middleware::from_fn(auth_middleware))

// Get ../consensus_version
Expand Down
15 changes: 15 additions & 0 deletions node/rest/src/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ pub(crate) struct BlockRange {
end: u32,
}

#[derive(Deserialize, Serialize)]
pub(crate) struct BackupPath {
path: std::path::PathBuf,
}

/// The query object for `get_mapping_value` and `get_mapping_values`.
#[derive(Copy, Clone, Deserialize, Serialize)]
pub(crate) struct Metadata {
Expand Down Expand Up @@ -583,6 +588,16 @@ impl<N: Network, C: ConsensusStorage<N>, R: Routing<N>> Rest<N, C, R> {
Ok(ErasedJson::pretty(solution_id))
}

// POST /{network}/db_backup?path=new_fs_path
pub(crate) async fn db_backup(
State(rest): State<Self>,
backup_path: Query<BackupPath>,
) -> Result<ErasedJson, RestError> {
rest.ledger.backup_database(&backup_path.path).map_err(RestError::from)?;

Ok(ErasedJson::pretty(()))
}

// GET /{network}/block/{blockHeight}/history/{mapping}
#[cfg(feature = "history")]
pub(crate) async fn get_history(
Expand Down
3 changes: 2 additions & 1 deletion node/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ impl<N: Network, C: ConsensusStorage<N>> Client<N, C> {
cdn: Option<String>,
storage_mode: StorageMode,
rotate_external_peers: bool,
dev: Option<u16>,
shutdown: Arc<AtomicBool>,
) -> Result<Self> {
// Initialize the signal handler.
Expand All @@ -167,7 +168,7 @@ impl<N: Network, C: ConsensusStorage<N>> Client<N, C> {
Self::MAXIMUM_NUMBER_OF_PEERS as u16,
rotate_external_peers,
allow_external_peers,
matches!(storage_mode, StorageMode::Development(_)),
dev.is_some(),
)
.await?;

Expand Down
Loading