Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions .ci/test_dev_on_prod_restart.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#!/usr/bin/env bash

set -euo pipefail

if [ "${BASH_VERSINFO[0]}" -lt 5 ]; then
echo "Error: This script requires bash version 5.0 or higher."
exit 1
fi

# shellcheck source=SCRIPTDIR/utils.sh
. ./.ci/utils.sh

network_id=0
NETWORK_NAME=$(get_network_name "$network_id")
REST_PORT=3030
NUM_DEV_NODES=4
SETUP_ADVANCE_BLOCKS=5
DEV_ADVANCE_BLOCKS=10
SETUP_MAX_WAIT=20
DEV_MAX_WAIT=100

BOOTSTRAP_PID=""
declare -a DEV_PIDS=()
SNARKOS_SETUP_BIN="snarkos"

function get_height() {
local port="$1"
local result
result=$(curl -s --max-time 2 "http://${localhost}:${port}/v2/${NETWORK_NAME}/block/height/latest" || true)
if is_integer "$result"; then
echo "$result"
else
echo ""
fi
}

function wait_for_node_ready() {
local port="$1"
local timeout="$2"
local start
start=$(now)

while (( $(elapsed_since "$start") < timeout )); do
local height
height=$(get_height "$port")
if [ -n "$height" ]; then
log "Node on port ${port} is ready at height ${height}"
return 0
fi
log "Sleeping for 2 seconds before retrying to get height"
sleep 2
done

log "Timed out waiting for node on port ${port} to become ready"
return 1
}

function wait_for_height_advance() {
local port="$1"
local advance_by="$2"
local timeout="$3"
local label="$4"

wait_for_node_ready "$port" "$timeout"

local start_height
start_height=$(get_height "$port")
if [ -z "$start_height" ]; then
log "${label}: failed to read initial block height from port ${port}"
return 1
fi

local target_height=$((start_height + advance_by))
local start_time
start_time=$(now)
local last_log_time=0

log "${label}: waiting for height to advance by ${advance_by} blocks (${start_height} -> ${target_height})"
while (( $(elapsed_since "$start_time") < timeout )); do
local current_height
current_height=$(get_height "$port")
if [ -n "$current_height" ] && (( current_height >= target_height )); then
log "${label}: reached target height ${current_height} (>= ${target_height})"
return 0
fi

local elapsed
elapsed=$(elapsed_since "$start_time")
if (( elapsed - last_log_time >= 15 )); then
if [ -n "$current_height" ]; then
log "${label}: current height ${current_height}, target ${target_height}"
else
log "${label}: waiting for REST endpoint on port ${port}"
fi
last_log_time=$elapsed
fi
log "Sleeping for 2 seconds before retrying to get height"
sleep 2
done

local final_height
final_height=$(get_height "$port")
log "${label}: timed out waiting for height advance (final height: ${final_height:-unavailable}, target: ${target_height})"
return 1
}

function wait_for_pid_exit() {
local pid="$1"
local timeout="$2"
local start
start=$(now)
while (( $(elapsed_since "$start") < timeout )); do
if ! kill -0 "$pid" 2>/dev/null; then
return 0
fi
sleep 1
done
return 1
}

function graceful_stop_pid() {
local pid="$1"
local label="$2"

if [ -z "$pid" ]; then
return 0
fi

if ! kill -0 "$pid" 2>/dev/null; then
return 0
fi

log "Stopping ${label} (pid=${pid}) with SIGINT"
kill -INT "$pid" 2>/dev/null || true
if wait_for_pid_exit "$pid" 60; then
return 0
fi

log "${label} did not exit after SIGINT; sending SIGTERM"
kill -TERM "$pid" 2>/dev/null || true
if wait_for_pid_exit "$pid" 20; then
return 0
fi

log "${label} did not exit after SIGTERM; sending SIGKILL"
kill -KILL "$pid" 2>/dev/null || true
wait "$pid" 2>/dev/null || true
}

function graceful_stop_all_dev_nodes() {
for i in "${!DEV_PIDS[@]}"; do
graceful_stop_pid "${DEV_PIDS[$i]}" "dev-node-${i}"
done
DEV_PIDS=()
}

function cleanup() {
graceful_stop_all_dev_nodes
graceful_stop_pid "$BOOTSTRAP_PID" "setup-node"
}

function start_setup_node() {
mkdir -p dev_logs
log "Starting production setup node: ${SNARKOS_SETUP_BIN} start --client --nodisplay"
"$SNARKOS_SETUP_BIN" start --client --nodisplay > "dev_logs/setup-client.txt" 2>&1 &
BOOTSTRAP_PID=$!
log "Started setup node (pid=${BOOTSTRAP_PID})"
}

function copy_setup_ledger() {
local source="${HOME}/.aleo/storage/ledger-0"
if [ ! -d "$source" ]; then
log "Missing source ledger at ${source}"
exit 1
fi

log "Copying setup ledger into local ledgers"
rm -rf ledger-0 ledger-1 ledger-2 ledger-3
cp -r "$source" ledger-0
cp -r "$source" ledger-1
cp -r "$source" ledger-2
cp -r "$source" ledger-3
}

function start_dev_nodes() {
mkdir -p dev_logs
DEV_PIDS=()

for i in $(seq 0 $((NUM_DEV_NODES - 1))); do
log "Starting dev node ${i}"
DEV_COMMITTEE_NUM_VALIDATORS=${NUM_DEV_NODES} ~/programs/snarkOS/target/debug/snarkos start --nodisplay --validator --ledger-storage "ledger-${i}" --node-data-storage "node-data-${i}" --dev "${i}" \
--no-dev-txs --nocdn --dev-num-validators "${NUM_DEV_NODES}" --verbosity 2 \
--allow-external-peers --logfile "dev_logs/val-${i}.txt" --dev-on-prod &
DEV_PIDS[$i]=$!
sleep 1
done
}

trap cleanup EXIT
trap 'log "Error at line $LINENO while running: $BASH_COMMAND"' ERR

init_log_dir
require_cmd snarkos
require_cmd curl
require_cmd cargo
require_cmd tar

log "Downloading and building latest snarkOS release binary for setup node..."
download_and_build_latest_snarkos
SNARKOS_SETUP_BIN="$SNARKOS_RELEASE_BIN"
log "Using setup binary: ${SNARKOS_SETUP_BIN}"

log "Step 1: Start production node and wait for +${SETUP_ADVANCE_BLOCKS} blocks"
start_setup_node
wait_for_height_advance "$REST_PORT" "$SETUP_ADVANCE_BLOCKS" "$SETUP_MAX_WAIT" "setup-network"

log "Step 2: Gracefully stop production node"
graceful_stop_pid "$BOOTSTRAP_PID" "setup-node"
BOOTSTRAP_PID=""

log "Step 3: Copy production ledger and start 4 dev nodes"
copy_setup_ledger
start_dev_nodes

log "Step 4: Wait until dev network advances by +${DEV_ADVANCE_BLOCKS} blocks"
wait_for_height_advance "$REST_PORT" "$DEV_ADVANCE_BLOCKS" "$DEV_MAX_WAIT" "dev-network-first-run"

log "Step 5: Gracefully stop all dev nodes"
graceful_stop_all_dev_nodes

# TODO: encountering the following warnings when stopping and starting all dev nodes:
# - WARN "Cannot propose a batch for round 32 - the latest proposal cache round is 34"
# - WARN "Failed to load stored certificate 1936933304208994.. from proposal cache — Previous certificates for a batch in round 32 did not reach quorum threshold (gc = 0)"
# - WARN "Failed to load stored certificate 8429685712854720.. from proposal cache — Failed to fetch missing transmissions and previous certificates for round 33 from '127.0.0.1:0 — Unable to fetch batch certificate 1936933304208994661214537875451736804168699382028485722463302790461889223166field (failed to send request)"
# log "Step 6: Restart all dev nodes and wait for +${DEV_ADVANCE_BLOCKS} blocks"
# start_dev_nodes
# wait_for_height_advance "$REST_PORT" "$DEV_ADVANCE_BLOCKS" "$DEV_MAX_WAIT" "dev-network-second-run"

log "SUCCESS: Completed dev-on-prod restart flow"
32 changes: 28 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,23 @@ jobs:
command: |
./.ci/test_devnet.sh

dev-on-prod-restart-test:
executor: rust-docker
resource_class: << pipeline.parameters.xlarge >>
steps:
- checkout
- setup_environment:
cache_key: v4.2.0-rust-1.88.0-dev-on-prod-restart-test-cache
- install_snarkos
- run:
name: "Run dev-on-prod restart test"
timeout: 40m
no_output_timeout: 15m
command: |
./.ci/test_dev_on_prod_restart.sh
- clear_environment:
cache_key: v4.2.0-rust-1.88.0-dev-on-prod-restart-test-cache

chaotic-minority-reset-test:
executor: ubuntu-vm
resource_class: << pipeline.parameters.twoxlarge >>
Expand Down Expand Up @@ -791,8 +808,18 @@ workflows:
jobs:
- devnet-test

chaotic-devnet-workflow:
merge-devnet-workflow:
jobs:
- dev-on-prod-restart-test:
filters:
branches:
only:
- test_fixed_dev_committee
- canary
- testnet
- mainnet
- staging

- chaotic-minority-reset-test:
filters:
branches:
Expand All @@ -812,9 +839,6 @@ workflows:
- testnet
- mainnet
- staging

upgrade-workflow:
jobs:
- upgrade-test:
filters:
branches:
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ serial = [
]
test_targets = [ "snarkos-cli/test_targets" ]
test_consensus_heights = [ "snarkos-cli/test_consensus_heights" ]
test_network = [ "snarkos-cli/test_network" ]
test_network = [ "snarkos-cli/test_network", "snarkos-node/test_network" ]
tokio_console = [ "snarkos-cli/tokio_console" ]

[dependencies.clap]
Expand Down
8 changes: 6 additions & 2 deletions cli/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,17 @@ pub struct Start {
pub dev_num_clients: Option<u16>,

/// If development mode is enabled, specify whether node 0 should generate traffic to drive the network.
#[clap(long, group = "dev_flag")]
#[clap(long, group = "dev_flags")]
pub no_dev_txs: bool,

/// If development mode is enabled, specify the custom bonded balances as a JSON object.
#[clap(long, group = "dev_flags")]
pub dev_bonded_balances: Option<BondedBalances>,

/// If development mode is enabled, specify whether to run the node on a production ledger.
#[clap(long, group = "dev_flags", default_value_t = false)]
pub dev_on_prod: bool,

/// If the flag is set, the node will attempt to automatically migrate the node data to the new format.
#[clap(long)]
pub auto_migrate_node_data: bool,
Expand Down Expand Up @@ -525,7 +529,7 @@ impl Start {
/// Returns an alternative genesis block if the node is in development mode.
/// Otherwise, returns the actual genesis block.
fn parse_genesis<N: Network>(&self) -> Result<Block<N>> {
if self.dev.is_some() {
if self.dev.is_some() && !self.dev_on_prod {
// Determine the number of genesis committee members.
let num_committee_members = self.dev_num_validators;
ensure!(
Expand Down
4 changes: 1 addition & 3 deletions cli/src/helpers/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ use snarkvm::{console::network::Network, prelude::PrivateKey};
use anyhow::Result;
use rand::SeedableRng;
use rand_chacha::ChaChaRng;
pub use snarkos_utilities::DEVELOPMENT_MODE_RNG_SEED;
use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4};

/// The development mode RNG seed.
pub const DEVELOPMENT_MODE_RNG_SEED: u64 = 1234567890u64;

/// The development mode number of genesis committee members.
pub const DEVELOPMENT_MODE_NUM_GENESIS_COMMITTEE_MEMBERS: u16 = 4;

Expand Down
1 change: 1 addition & 0 deletions node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ serial = [
"snarkos-node-bft/serial"
]
test = []
test_network = [ "snarkos-node-bft/test_network", "snarkos-node-consensus/test_network" ]

[dependencies.aleo-std]
workspace = true
Expand Down
6 changes: 6 additions & 0 deletions node/bft/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ test = [
"snarkos-node-bft-ledger-service/test",
"snarkos-node-bft-storage-service/test"
]
test_network = [
"snarkos-node-bft-ledger-service/test_network",
]
serial = [
"snarkos-node-metrics/serial",
"snarkos-node-bft-ledger-service/serial"
Expand Down Expand Up @@ -95,6 +98,9 @@ workspace = true
[dependencies.rand]
workspace = true

[dependencies.rand_chacha]
workspace = true

[dependencies.rayon]
workspace = true

Expand Down
Loading