From ab5304ebb4b2612bae47ccb56c2b7f1ea7246e56 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Thu, 4 Sep 2025 12:38:54 -0600 Subject: [PATCH 01/16] misc(node/bft): replace SyncSender with a callback --- node/bft/src/gateway.rs | 65 ++++++----- node/bft/src/helpers/channels.rs | 86 +-------------- node/bft/src/helpers/mod.rs | 57 ++++++++++ node/bft/src/primary.rs | 7 +- node/bft/src/sync/mod.rs | 178 ++++++++++--------------------- node/src/client/mod.rs | 6 +- 6 files changed, 160 insertions(+), 239 deletions(-) diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 98f1ed8dc4..d09555fbad 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -21,7 +21,7 @@ use crate::{ MEMORY_POOL_PORT, Worker, events::{EventCodec, PrimaryPing}, - helpers::{Cache, PrimarySender, Resolver, Storage, SyncSender, WorkerSender, assign_to_worker}, + helpers::{Cache, CallbackHandle, PrimarySender, Resolver, Storage, WorkerSender, assign_to_worker}, spawn_blocking, }; use snarkos_account::Account; @@ -42,7 +42,7 @@ use snarkos_node_bft_events::{ ValidatorsResponse, }; use snarkos_node_bft_ledger_service::LedgerService; -use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService}; +use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService, locators::BlockLocators}; use snarkos_node_tcp::{ Config, Connection, @@ -56,6 +56,7 @@ use snarkos_node_tcp::{ use snarkvm::{ console::prelude::*, ledger::{ + Block, committee::Committee, narwhal::{BatchHeader, Data}, }, @@ -117,6 +118,22 @@ pub trait Transport: Send + Sync { fn broadcast(&self, event: Event); } +pub trait SyncCallback: Send + Sync { + /// We received a block response and can (possibly) advance synchronization. + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()>; + + /// We received new peer locators during a Ping. + fn update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()>; + + /// A peer disconnected. + fn remove_peer(&self, peer_ip: SocketAddr); + + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest); + + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse); +} + /// The gateway maintains connections to other validators. /// For connections with clients and provers, the Router logic is used. #[derive(Clone)] @@ -149,8 +166,8 @@ pub struct Gateway { primary_sender: Arc>>, /// The worker senders. worker_senders: Arc>>>, - /// The sync sender. - sync_sender: Arc>>, + /// The callback for sync messages. + sync_callback: Arc>>>, /// The spawned handles. handles: Arc>>>, /// The development mode. @@ -191,7 +208,7 @@ impl Gateway { validator_telemetry: Default::default(), primary_sender: Default::default(), worker_senders: Default::default(), - sync_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), dev, }) @@ -202,7 +219,7 @@ impl Gateway { &self, primary_sender: PrimarySender, worker_senders: IndexMap>, - sync_sender: Option>, + sync_callback: Option>>, ) { debug!("Starting the gateway for the memory pool..."); @@ -212,9 +229,8 @@ impl Gateway { // Set the worker senders. self.worker_senders.set(worker_senders).expect("The worker senders are already set"); - // If the sync sender was provided, set the sync sender. - if let Some(sync_sender) = sync_sender { - self.sync_sender.set(sync_sender).expect("Sync sender already set in gateway"); + if let Some(sync_callback) = sync_callback { + self.sync_callback.set(sync_callback).unwrap(); } // Enable the TCP protocols. @@ -549,13 +565,8 @@ impl Gateway { /// Removes the connected peer and adds them to the candidate peers. fn remove_connected_peer(&self, peer_ip: SocketAddr) { // Remove the peer from the sync module. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { - let tx_block_sync_remove_peer_ = sync_sender.tx_block_sync_remove_peer.clone(); - tokio::spawn(async move { - if let Err(e) = tx_block_sync_remove_peer_.send(peer_ip).await { - warn!("Unable to remove '{peer_ip}' from the sync module - {e}"); - } - }); + if let Some(cb) = &*self.sync_callback.get_ref() { + cb.remove_peer(peer_ip); } // Removes the bidirectional map between the listener address and (ambiguous) peer address. self.resolver.remove_peer(peer_ip); @@ -693,7 +704,7 @@ impl Gateway { } Event::BlockResponse(block_response) => { // Process the block response. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = self.sync_callback.get() { // Retrieve the block response. let BlockResponse { request, blocks } = block_response; @@ -719,8 +730,8 @@ impl Gateway { // Ensure the block response is well-formed. blocks.ensure_response_is_well_formed(peer_ip, request.start_height, request.end_height)?; // Send the blocks to the sync module. - if let Err(e) = sync_sender.advance_with_sync_blocks(peer_ip, blocks.0).await { - warn!("Unable to process block response from '{peer_ip}' - {e}"); + if let Err(err) = cb.insert_block_response(peer_ip, blocks.0) { + warn!("Unable to process block response from '{peer_ip}': {err}"); } } Ok(()) @@ -728,18 +739,18 @@ impl Gateway { Event::CertificateRequest(certificate_request) => { // Send the certificate request to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate request to the sync module. - let _ = sync_sender.tx_certificate_request.send((peer_ip, certificate_request)).await; + cb.send_certificate_response(peer_ip, certificate_request); } Ok(()) } Event::CertificateResponse(certificate_response) => { // Send the certificate response to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate response to the sync module. - let _ = sync_sender.tx_certificate_response.send((peer_ip, certificate_response)).await; + cb.finish_certificate_request(peer_ip, certificate_response); } Ok(()) } @@ -759,9 +770,9 @@ impl Gateway { } // Update the peer locators. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Check the block locators are valid, and update the validators in the sync module. - if let Err(error) = sync_sender.update_peer_locators(peer_ip, block_locators).await { + if let Err(error) = cb.update_peer_locators(peer_ip, block_locators) { bail!("Validator '{peer_ip}' sent invalid block locators - {error}"); } } @@ -944,9 +955,11 @@ impl Gateway { pub async fn shut_down(&self) { info!("Shutting down the gateway..."); // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Close the listener. self.tcp.shut_down().await; + // Remove the sync callback (so it can be dropped). + self.sync_callback.clear(); } } diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1370ee33de..8dfa1229f9 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -13,19 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::events::{ - BatchPropose, - BatchSignature, - CertificateRequest, - CertificateResponse, - TransmissionRequest, - TransmissionResponse, -}; -use snarkos_node_sync::locators::BlockLocators; +use crate::events::{BatchPropose, BatchSignature, TransmissionRequest, TransmissionResponse}; use snarkvm::{ console::network::*, ledger::{ - block::{Block, Transaction}, + block::Transaction, narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, @@ -228,77 +220,3 @@ pub fn init_worker_channels() -> (WorkerSender, WorkerReceiver (sender, receiver) } - -#[derive(Debug)] -pub struct SyncSender { - pub tx_block_sync_advance_with_sync_blocks: mpsc::Sender<(SocketAddr, Vec>, oneshot::Sender>)>, - pub tx_block_sync_remove_peer: mpsc::Sender, - pub tx_block_sync_update_peer_locators: mpsc::Sender<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub tx_certificate_request: mpsc::Sender<(SocketAddr, CertificateRequest)>, - pub tx_certificate_response: mpsc::Sender<(SocketAddr, CertificateResponse)>, -} - -impl SyncSender { - /// Sends the request to update the peer locators. - pub async fn update_peer_locators(&self, peer_ip: SocketAddr, block_locators: BlockLocators) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to update the peer locators. - // This `tx_block_sync_update_peer_locators.send()` call - // causes the `rx_block_sync_update_peer_locators.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_update_peer_locators.send((peer_ip, block_locators, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the request to advance with sync blocks. - pub async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to advance with sync blocks. - // This `tx_block_sync_advance_with_sync_blocks.send()` call - // causes the `rx_block_sync_advance_with_sync_blocks.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_advance_with_sync_blocks.send((peer_ip, blocks, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct SyncReceiver { - pub rx_block_sync_advance_with_sync_blocks: - mpsc::Receiver<(SocketAddr, Vec>, oneshot::Sender>)>, - pub rx_block_sync_remove_peer: mpsc::Receiver, - pub rx_block_sync_update_peer_locators: mpsc::Receiver<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub rx_certificate_request: mpsc::Receiver<(SocketAddr, CertificateRequest)>, - pub rx_certificate_response: mpsc::Receiver<(SocketAddr, CertificateResponse)>, -} - -/// Initializes the sync channels. -pub fn init_sync_channels() -> (SyncSender, SyncReceiver) { - let (tx_block_sync_advance_with_sync_blocks, rx_block_sync_advance_with_sync_blocks) = - mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_remove_peer, rx_block_sync_remove_peer) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_update_peer_locators, rx_block_sync_update_peer_locators) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_request, rx_certificate_request) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_response, rx_certificate_response) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = SyncSender { - tx_block_sync_advance_with_sync_blocks, - tx_block_sync_remove_peer, - tx_block_sync_update_peer_locators, - tx_certificate_request, - tx_certificate_response, - }; - let receiver = SyncReceiver { - rx_block_sync_advance_with_sync_blocks, - rx_block_sync_remove_peer, - rx_block_sync_update_peer_locators, - rx_certificate_request, - rx_certificate_response, - }; - - (sender, receiver) -} diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 7d9dd7f531..9efba541c3 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -54,6 +54,9 @@ pub use telemetry::*; pub mod timestamp; pub use timestamp::*; +use anyhow::{Result, bail}; +use parking_lot::{RwLock, RwLockReadGuard}; + /// Formats an ID into a truncated identifier (for logging purposes). pub fn fmt_id(id: impl ToString) -> String { let id = id.to_string(); @@ -63,3 +66,57 @@ pub fn fmt_id(id: impl ToString) -> String { } formatted_id } + +/// Helper struct to hold a reference to a callback struct. +pub struct CallbackHandle { + callback: RwLock>, +} + +impl Default for CallbackHandle { + /// By default, the handle holds no callback. + fn default() -> Self { + Self { callback: RwLock::new(None) } + } +} + +impl CallbackHandle { + /// Set a callback. Returns an error if a callback was already set. + pub fn set(&self, callback: C) -> Result<()> { + let prev = self.callback.write().replace(callback); + + if prev.is_some() { + bail!("Callback was already set"); + } + + Ok(()) + } + + /// Get a cloned copy of the callback. + /// Useful when the callback will be used across await-boundaries. + #[inline] + pub fn get(&self) -> Option { + self.callback.read().clone() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(feature = "locktick")] + #[inline] + pub fn get_ref(&self) -> RwLockReadGuard<'_, Option> { + self.callback.read() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(not(feature = "locktick"))] + #[inline] + pub fn get_ref(&self) -> RwLockReadGuard<'_, Option> { + self.callback.read() + } + + /// Remove the callback. + /// Used during shutdown to resolve circular dependencies between types. + pub fn clear(&self) { + let _ = self.callback.write().take(); + } +} diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 41e0b50083..72a86574ae 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -35,7 +35,6 @@ use crate::{ assign_to_worker, assign_to_workers, fmt_id, - init_sync_channels, init_worker_channels, now, }, @@ -235,16 +234,14 @@ impl Primary { // Set the workers. self.workers = Arc::from(workers); - // First, initialize the sync channels. - let (sync_sender, sync_receiver) = init_sync_channels(); // Next, initialize the sync module and sync the storage from ledger. self.sync.initialize(bft_sender).await?; // Next, load and process the proposal cache before running the sync module. self.load_proposal_cache().await?; // Next, run the sync module. - self.sync.run(ping, sync_receiver).await?; + self.sync.run(ping).await?; // Next, initialize the gateway. - self.gateway.run(primary_sender, worker_senders, Some(sync_sender)).await; + self.gateway.run(primary_sender, worker_senders, Some(Arc::new(self.sync.clone()))).await; // Lastly, start the primary handlers. // Note: This ensures the primary does not start communicating before syncing is complete. self.start_handlers(primary_receiver); diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 85ad6c0a7e..e6540c9118 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -19,7 +19,8 @@ use crate::{ PRIMARY_PING_IN_MS, Transport, events::DataBlocks, - helpers::{BFTSender, Pending, Storage, SyncReceiver, fmt_id, max_redundant_requests}, + gateway::SyncCallback as GatewaySyncCallback, + helpers::{BFTSender, Pending, Storage, fmt_id, max_redundant_requests}, spawn_blocking, }; use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; @@ -44,19 +45,20 @@ use std::{ future::Future, net::SocketAddr, sync::Arc, - time::Duration, + time::{Duration, Instant}, }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; use tokio::{ sync::{OnceCell, oneshot}, task::JoinHandle, + time::{sleep, timeout}, }; /// Block synchronization logic for validators. /// /// Synchronization works differently for nodes that act as validators in AleoBFT; -/// In the common case, validators generate blocks after receiving an anchor block that has been accepted +/// In the common case, validators generate blocks after receiving an anchor certificate that has been accepted /// by a supermajority of the committee instead of fetching entire blocks from other nodes. /// However, if a validator does not have an up-to-date DAG, it might still fetch entire blocks from other nodes. /// @@ -96,6 +98,8 @@ pub struct Sync { } impl Sync { + const SYNC_INTERVAL: Duration = Duration::from_millis(PRIMARY_PING_IN_MS); + /// Initializes a new sync instance. pub fn new( gateway: Gateway, @@ -162,7 +166,7 @@ impl Sync { /// /// When this function returns successfully, the sync module will have spawned background tasks /// that fetch blocks from other validators. - pub async fn run(&self, ping: Option>>, sync_receiver: SyncReceiver) -> Result<()> { + pub async fn run(&self, ping: Option>>) -> Result<()> { info!("Starting the sync module..."); // Start the block sync loop. @@ -172,10 +176,18 @@ impl Sync { // Ideally, a node does not consider itself synced when it has not received // any block locators from peers. However, in the initial bootup of validators, // this needs to happen, so we use this additional sleep as a grace period. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + + let mut last_update = Instant::now(); loop { - // Sleep briefly to avoid triggering spam detection. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + // Make sure we do not sync too often + let now = Instant::now(); + let elapsed = now.saturating_duration_since(last_update); + let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); + + if !sleep_time.is_zero() { + sleep(sleep_time).await; + } let new_blocks = self_.try_block_sync().await; if new_blocks { @@ -186,6 +198,7 @@ impl Sync { } } } + last_update = now; } }); @@ -194,7 +207,7 @@ impl Sync { self.spawn(async move { loop { // Sleep briefly. - tokio::time::sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; + sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; // Remove the expired pending transmission requests. let self__ = self_.clone(); @@ -205,78 +218,6 @@ impl Sync { } }); - /* Set up callbacks for events from the Gateway */ - - // Retrieve the sync receiver. - let SyncReceiver { - mut rx_block_sync_advance_with_sync_blocks, - mut rx_block_sync_remove_peer, - mut rx_block_sync_update_peer_locators, - mut rx_certificate_request, - mut rx_certificate_response, - } = sync_receiver; - - // Process the block sync request to advance with sync blocks. - // Each iteration of this loop is triggered by an incoming [`BlockResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::advance_with_sync_blocks()`], - // which calls [`tx_block_sync_advance_with_sync_blocks.send()`], - // which causes the `rx_block_sync_advance_with_sync_blocks.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, blocks, callback)) = rx_block_sync_advance_with_sync_blocks.recv().await { - callback.send(self_.advance_with_sync_blocks(peer_ip, blocks).await).ok(); - } - }); - - // Process the block sync request to remove the peer. - let self_ = self.clone(); - self.spawn(async move { - while let Some(peer_ip) = rx_block_sync_remove_peer.recv().await { - self_.remove_peer(peer_ip); - } - }); - - // Process each block sync request to update peer locators. - // Each iteration of this loop is triggered by an incoming [`PrimaryPing`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::update_peer_locators()`], - // which calls [`tx_block_sync_update_peer_locators.send()`], - // which causes the `rx_block_sync_update_peer_locators.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, locators, callback)) = rx_block_sync_update_peer_locators.recv().await { - let self_clone = self_.clone(); - tokio::spawn(async move { - callback.send(self_clone.update_peer_locators(peer_ip, locators)).ok(); - }); - } - }); - - // Process each certificate request. - // Each iteration of this loop is triggered by an incoming [`CertificateRequest`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_request.send()`], - // which causes the `rx_certificate_request.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_request)) = rx_certificate_request.recv().await { - self_.send_certificate_response(peer_ip, certificate_request); - } - }); - - // Process each certificate response. - // Each iteration of this loop is triggered by an incoming [`CertificateResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_response.send()`], - // which causes the `rx_certificate_response.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_response)) = rx_certificate_response.recv().await { - self_.finish_certificate_request(peer_ip, certificate_response); - } - }); - Ok(()) } @@ -295,6 +236,9 @@ impl Sync { self.send_block_requests(sync_peers, requests).await; } + // Wait for updates or a timeout. + let _ = timeout(Self::SYNC_INTERVAL, self.block_sync.wait_for_update()).await; + // Do not attempt to sync if there are no blocks to sync. // This prevents redundant log messages and performing unnecessary computation. if !self.block_sync.can_block_sync() { @@ -316,22 +260,19 @@ impl Sync { } } } + + /// Test-only. Manually add peer locators. + #[cfg(test)] + pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { + self.update_peer_locators(peer_ip, locators) + } } // Callbacks used when receiving messages from the Gateway -impl Sync { +impl GatewaySyncCallback for Sync { /// We received a block response and can (possibly) advance synchronization. - async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Verify that the response is valid and add it to block sync. - self.block_sync.insert_block_responses(peer_ip, blocks)?; - - // Try to process responses stored in BlockSync. - // Note: Do not call `self.block_sync.try_advancing_block_synchronziation` here as it will process - // and remove any completed requests, which means the call to `sync_storage_with_blocks` will not process - // them as expected. - self.try_advancing_block_synchronization().await?; - - Ok(()) + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { + self.block_sync.insert_block_responses(peer_ip, blocks) } /// We received new peer locators during a Ping. @@ -344,9 +285,30 @@ impl Sync { self.block_sync.remove_peer(&peer_ip); } - #[cfg(test)] - pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { - self.update_peer_locators(peer_ip, locators) + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { + // Attempt to retrieve the certificate. + if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { + // Send the certificate response to the peer. + let self_ = self.clone(); + tokio::spawn(async move { + let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; + }); + } + } + + /// Handles the incoming certificate response. + /// This method ensures the certificate response is well-formed and matches the certificate ID. + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { + let certificate = response.certificate; + // Check if the peer IP exists in the pending queue for the given certificate ID. + let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); + // If the peer IP exists, finish the pending request. + if exists { + // TODO: Validate the certificate. + // Remove the certificate ID from the pending queue. + self.pending.remove(certificate.id(), Some(certificate)); + } } } @@ -896,32 +858,6 @@ impl Sync { Err(e) => bail!("Unable to fetch certificate {} - (timeout) {e}", fmt_id(certificate_id)), } } - - /// Handles the incoming certificate request. - fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { - // Attempt to retrieve the certificate. - if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { - // Send the certificate response to the peer. - let self_ = self.clone(); - tokio::spawn(async move { - let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; - }); - } - } - - /// Handles the incoming certificate response. - /// This method ensures the certificate response is well-formed and matches the certificate ID. - fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { - let certificate = response.certificate; - // Check if the peer IP exists in the pending queue for the given certificate ID. - let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); - // If the peer IP exists, finish the pending request. - if exists { - // TODO: Validate the certificate. - // Remove the certificate ID from the pending queue. - self.pending.remove(certificate.id(), Some(certificate)); - } - } } impl Sync { diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index 4eb03f5e94..ffc0041599 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -269,7 +269,7 @@ impl> Client { break; } - // Make sure we do not sync too often + // Make sure we do not sync too often. let now = Instant::now(); let elapsed = now.saturating_duration_since(last_update); let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); @@ -285,9 +285,9 @@ impl> Client { })); } - /// Client-side version of `snarkvm_node_bft::Sync::try_block_sync()`. + /// Client-side version of `snarkos_node_bft::Sync::try_block_sync()`. async fn try_block_sync(&self) { - // Sleep briefly to avoid triggering spam detection. + // Wait for updates or a timeout. let _ = timeout(Self::SYNC_INTERVAL, self.sync.wait_for_update()).await; // For sanity, check that sync height is never below ledger height. From da63d23e4f55b73490f505833d2812ef7f33c048 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 16:25:28 -0700 Subject: [PATCH 02/16] misc(bft): get rid of bft channels --- Cargo.lock | 171 ++++++++++++++++--------------- Cargo.toml | 4 +- build.rs | 3 +- node/bft/examples/simple_node.rs | 2 +- node/bft/src/bft.rs | 166 ++++++++++-------------------- node/bft/src/gateway.rs | 7 +- node/bft/src/helpers/channels.rs | 63 ------------ node/bft/src/helpers/mod.rs | 8 +- node/bft/src/lib.rs | 2 +- node/bft/src/primary.rs | 81 +++++++-------- node/bft/src/sync/mod.rs | 68 +++++++----- node/bft/src/worker.rs | 2 +- node/bft/tests/common/primary.rs | 2 +- node/consensus/src/lib.rs | 31 ++++-- 14 files changed, 269 insertions(+), 341 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d57e13b77e..ffdde8204c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,7 @@ dependencies = [ "cookie", "document-features", "idna", - "indexmap 2.11.1", + "indexmap 2.11.3", "log", "serde", "serde_derive", @@ -1454,7 +1454,7 @@ dependencies = [ "js-sys", "libc", "r-efi", - "wasi 0.14.6+wasi-0.2.4", + "wasi 0.14.7+wasi-0.2.4", "wasm-bindgen", ] @@ -1518,7 +1518,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.11.1", + "indexmap 2.11.3", "slab", "tokio", "tokio-util", @@ -1944,14 +1944,15 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.1" +version = "2.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921" +checksum = "92119844f513ffa41556430369ab02c295a3578af21cf945caa3e9e0c2481ac3" dependencies = [ "equivalent", "hashbrown 0.15.5", "rayon", "serde", + "serde_core", ] [[package]] @@ -2083,9 +2084,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.78" +version = "0.3.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" +checksum = "6247da8b8658ad4e73a186e747fcc5fc2a29f979d6fe6269127fdb5fd08298d0" dependencies = [ "once_cell", "wasm-bindgen", @@ -2234,6 +2235,16 @@ name = "locktick" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307f02aff720d58003290879abe635b818b2176488c5ba2855ab9c11b4e0c04e" +dependencies = [ + "backtrace", + "parking_lot", + "simple_moving_average", +] + +[[package]] +name = "locktick" +version = "0.3.0" +source = "git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard#014d3940ae8c52e860d7ed539f8c3e9452b769ff" dependencies = [ "backtrace", "parking_lot", @@ -2321,7 +2332,7 @@ dependencies = [ "base64 0.21.7", "hyper 0.14.32", "hyper-tls 0.5.0", - "indexmap 2.11.1", + "indexmap 2.11.3", "ipnet", "metrics", "metrics-util", @@ -3477,9 +3488,9 @@ checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b" +checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d" dependencies = [ "serde_core", "serde_derive", @@ -3487,18 +3498,18 @@ dependencies = [ [[package]] name = "serde_core" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab" +checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0" +checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -3511,7 +3522,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "itoa", "memchr", "ryu", @@ -3561,7 +3572,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.11.1", + "indexmap 2.11.3", "schemars 0.9.0", "schemars 1.0.4", "serde", @@ -3720,7 +3731,7 @@ version = "4.2.1" dependencies = [ "built", "clap", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rusty-hook", "snarkos-account", "snarkos-cli", @@ -3759,8 +3770,8 @@ dependencies = [ "clap", "colored 3.0.0", "crossterm 0.29.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "nix", "num_cpus", "parking_lot", @@ -3811,8 +3822,8 @@ dependencies = [ "deadline", "futures-util", "http 1.3.1", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "num_cpus", "once_cell", @@ -3853,9 +3864,9 @@ dependencies = [ "colored 3.0.0", "deadline", "futures", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.12.1", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "mockall", "open", @@ -3894,7 +3905,7 @@ version = "4.2.1" dependencies = [ "anyhow", "bytes", - "indexmap 2.11.1", + "indexmap 2.11.3", "proptest", "serde", "snarkos-node-sync-locators", @@ -3911,8 +3922,8 @@ version = "4.2.1" dependencies = [ "anyhow", "async-trait", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rand 0.8.5", "rayon", @@ -3928,8 +3939,8 @@ version = "4.2.1" dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "parking_lot", "snarkvm", @@ -3944,7 +3955,7 @@ dependencies = [ "bincode", "colored 3.0.0", "http 1.3.1", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rayon", "reqwest", @@ -3964,9 +3975,9 @@ dependencies = [ "aleo-std", "anyhow", "colored 3.0.0", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "once_cell", "parking_lot", @@ -3986,7 +3997,7 @@ dependencies = [ name = "snarkos-node-metrics" version = "4.2.1" dependencies = [ - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "metrics-exporter-prometheus", "parking_lot", "rayon", @@ -4004,9 +4015,9 @@ dependencies = [ "base64 0.22.1", "built", "http 1.3.1", - "indexmap 2.11.1", + "indexmap 2.11.3", "jsonwebtoken", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "once_cell", "parking_lot", "rand 0.8.5", @@ -4039,7 +4050,7 @@ dependencies = [ "futures", "futures-util", "linked-hash-map", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "peak_alloc", "rand 0.8.5", @@ -4082,9 +4093,9 @@ version = "4.2.1" dependencies = [ "anyhow", "futures", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rand 0.8.5", "serde", @@ -4113,7 +4124,7 @@ name = "snarkos-node-sync-locators" version = "4.2.1" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "serde", "snarkvm", "tracing", @@ -4126,7 +4137,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "once_cell", "parking_lot", "snarkos-node-metrics", @@ -4173,7 +4184,7 @@ dependencies = [ "fxhash", "hashbrown 0.15.5", "hex", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", "num-traits", "rand 0.8.5", @@ -4249,7 +4260,7 @@ name = "snarkvm-circuit-environment" version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", "nom", "num-traits", @@ -4441,7 +4452,7 @@ source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be# dependencies = [ "anyhow", "enum-iterator", - "indexmap 2.11.1", + "indexmap 2.11.3", "lazy_static", "paste", "serde", @@ -4480,7 +4491,7 @@ dependencies = [ "enum-iterator", "enum_index", "enum_index_derive", - "indexmap 2.11.1", + "indexmap 2.11.3", "num-derive", "num-traits", "serde_json", @@ -4618,8 +4629,8 @@ source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be# dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4656,7 +4667,7 @@ version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4678,7 +4689,7 @@ version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "proptest", "rand 0.8.5", "rand_chacha 0.3.1", @@ -4709,7 +4720,7 @@ name = "snarkvm-ledger-narwhal-batch-certificate" version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4722,7 +4733,7 @@ name = "snarkvm-ledger-narwhal-batch-header" version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4746,7 +4757,7 @@ name = "snarkvm-ledger-narwhal-subdag" version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4786,8 +4797,8 @@ dependencies = [ "aleo-std", "anyhow", "bincode", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4806,8 +4817,8 @@ dependencies = [ "aleo-std", "anyhow", "colored 3.0.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4846,8 +4857,8 @@ dependencies = [ "aleo-std-storage", "anyhow", "bincode", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "rayon", "rocksdb", @@ -4903,7 +4914,7 @@ dependencies = [ "curl", "hex", "lazy_static", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "paste", "rand 0.8.5", @@ -4921,9 +4932,9 @@ source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be# dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4954,8 +4965,8 @@ source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be# dependencies = [ "aleo-std", "colored 3.0.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "rand 0.8.5", "rand_chacha 0.3.1", @@ -4977,7 +4988,7 @@ name = "snarkvm-synthesizer-program" version = "4.2.1" source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "paste", "rand 0.8.5", "rand_chacha 0.3.1", @@ -5558,7 +5569,7 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae2a4cf385da23d1d53bc15cdfa5c2109e93d8d362393c801e87da2f72f0e201" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "serde_core", "serde_spanned", "toml_datetime", @@ -5965,9 +5976,9 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.14.6+wasi-0.2.4" +version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f71243a3f320c00a8459e455c046ce571229c2f31fd11645d9dc095e3068ca0" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" dependencies = [ "wasip2", ] @@ -5983,9 +5994,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.101" +version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" +checksum = "4ad224d2776649cfb4f4471124f8176e54c1cca67a88108e30a0cd98b90e7ad3" dependencies = [ "cfg-if", "once_cell", @@ -5996,9 +6007,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.101" +version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" +checksum = "3a1364104bdcd3c03f22b16a3b1c9620891469f5e9f09bc38b2db121e593e732" dependencies = [ "bumpalo", "log", @@ -6010,9 +6021,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.51" +version = "0.4.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca85039a9b469b38336411d6d6ced91f3fc87109a2a27b0c197663f5144dffe" +checksum = "9c0a08ecf5d99d5604a6666a70b3cde6ab7cc6142f5e641a8ef48fc744ce8854" dependencies = [ "cfg-if", "js-sys", @@ -6023,9 +6034,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.101" +version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" +checksum = "0d7ab4ca3e367bb1ed84ddbd83cc6e41e115f8337ed047239578210214e36c76" dependencies = [ "quote 1.0.40", "wasm-bindgen-macro-support", @@ -6033,9 +6044,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.101" +version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" +checksum = "4a518014843a19e2dbbd0ed5dfb6b99b23fb886b14e6192a00803a3e14c552b0" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -6046,18 +6057,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.101" +version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" +checksum = "255eb0aa4cc2eea3662a00c2bbd66e93911b7361d5e0fcd62385acfd7e15dcee" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.78" +version = "0.3.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e4b637749ff0d92b8fad63aa1f7cff3cbe125fd49c175cd6345e7272638b12" +checksum = "50462a022f46851b81d5441d1a6f5bac0b21a1d72d64bd4906fbdd4bf7230ec7" dependencies = [ "js-sys", "wasm-bindgen", @@ -6548,7 +6559,7 @@ dependencies = [ "crossbeam-utils", "displaydoc", "flate2", - "indexmap 2.11.1", + "indexmap 2.11.3", "memchr", "thiserror 2.0.16", "time", diff --git a/Cargo.toml b/Cargo.toml index e922558d34..2c332825c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,7 +94,9 @@ version = "0.3" version = "0.3" [workspace.dependencies.locktick] -version = "0.3" +#version = "0.3" +git = "https://github.com/kaimast/locktick.git" +branch = "fix/export-lock-guard" [workspace.dependencies.lru] version = "0.16" diff --git a/build.rs b/build.rs index a807f82d0b..e5f5c9edcc 100644 --- a/build.rs +++ b/build.rs @@ -126,8 +126,9 @@ fn check_locktick_imports>(path: P) { } // If the file has a lock import "imbalance", print it out and increment the counter. + // Allow having more locktick, than regular, imports. assert!( - lock_balance == 0, + lock_balance <= 0, "The locks in \"{}\" don't seem to have `locktick` counterparts!", entry.path().display() ); diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 6001617668..0dbb43665c 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -198,7 +198,7 @@ pub async fn start_primary( None, )?; // Run the primary instance. - primary.run(None, None, sender.clone(), receiver).await?; + primary.run(None, None, None, sender.clone(), receiver).await?; // Handle OS signals. handle_signals(&primary); // Return the primary instance. diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 2d878d7bfb..b497304e6f 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,18 +15,9 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - Primary, - helpers::{ - BFTReceiver, - ConsensusSender, - DAG, - PrimaryReceiver, - PrimarySender, - Storage, - fmt_id, - init_bft_channels, - now, - }, + helpers::{ConsensusSender, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + primary::{Primary, PrimaryCallback}, + sync::SyncCallback, }; use snarkos_account::Account; use snarkos_node_bft_ledger_service::LedgerService; @@ -46,15 +37,11 @@ use aleo_std::StorageMode; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::{Mutex, RwLock}, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::RwLock, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] -use parking_lot::{Mutex, RwLock}; +use parking_lot::RwLock; use std::{ collections::{BTreeMap, HashSet}, - future::Future, net::SocketAddr, sync::{ Arc, @@ -63,10 +50,7 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{ - sync::{OnceCell, oneshot}, - task::JoinHandle, -}; +use tokio::sync::{OnceCell, oneshot}; #[derive(Clone)] pub struct BFT { @@ -80,8 +64,6 @@ pub struct BFT { leader_certificate_timer: Arc, /// The consensus sender. consensus_sender: Arc>>, - /// Handles for all spawned tasks. - handles: Arc>>>, /// The BFT lock. lock: Arc>, } @@ -105,7 +87,6 @@ impl BFT { leader_certificate: Default::default(), leader_certificate_timer: Default::default(), consensus_sender: Default::default(), - handles: Default::default(), lock: Default::default(), }) } @@ -122,14 +103,16 @@ impl BFT { primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the BFT instance..."); - // Initialize the BFT channels. - let (bft_sender, bft_receiver) = init_bft_channels::(); - // First, start the BFT handlers. - self.start_handlers(bft_receiver); + // Set up callbacks. + let primary_callback = Some(Arc::new(self.clone()) as Arc>); + + let sync_callback = Some(Arc::new(self.clone()) as Arc>); + // Next, run the primary instance. - self.primary.run(ping, Some(bft_sender), primary_sender, primary_receiver).await?; + self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; + // Lastly, set the consensus sender. - // Note: This ensures during initial syncing, that the BFT does not advance the ledger. + // Note: This ensures that, during initial syncing, that the BFT does not advance the ledger. if let Some(consensus_sender) = consensus_sender { self.consensus_sender.set(consensus_sender).expect("Consensus sender already set"); } @@ -211,8 +194,9 @@ impl BFT { } } -impl BFT { - /// Stores the certificate in the DAG, and attempts to commit one or more anchors. +#[async_trait::async_trait] +impl PrimaryCallback for BFT { + /// Notification that a new round has started. fn update_to_next_round(&self, current_round: u64) -> bool { // Ensure the current round is at least the storage round (this is a sanity check). let storage_round = self.storage().current_round(); @@ -280,6 +264,41 @@ impl BFT { is_ready } + /// Notification about a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +#[async_trait::async_trait] +impl SyncCallback for BFT { + /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** + /// already exist in the ledger. + /// + /// This method commits all the certificates into the DAG. + /// Note that there is no need to insert the certificates into the DAG, because these certificates + /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()> { + // Acquire the BFT write lock. + let mut dag = self.dag.write(); + + // Commit all the certificates. + for certificate in certificates { + dag.commit(&certificate, self.storage().max_gc_rounds()); + } + + Ok(()) + } + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +impl BFT { /// Updates the leader certificate to the current even round, /// returning `true` if the BFT is ready to update to the next round. /// @@ -855,77 +874,6 @@ impl BFT { } impl BFT { - /// Starts the BFT handlers. - fn start_handlers(&self, bft_receiver: BFTReceiver) { - let BFTReceiver { - mut rx_primary_round, - mut rx_primary_certificate, - mut rx_sync_bft_dag_at_bootup, - mut rx_sync_bft, - } = bft_receiver; - - // Process the current round from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((current_round, callback)) = rx_primary_round.recv().await { - callback.send(self_.update_to_next_round(current_round)).ok(); - } - }); - - // Process the certificate from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_primary_certificate.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - - // Process the request to sync the BFT DAG at bootup. - let self_ = self.clone(); - self.spawn(async move { - while let Some(certificates) = rx_sync_bft_dag_at_bootup.recv().await { - self_.sync_bft_dag_at_bootup(certificates).await; - } - }); - - // Handler for new certificates that were fetched by the sync module. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_sync_bft.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - } - - /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** - /// already exist in the ledger. - /// - /// This method commits all the certificates into the DAG. - /// Note that there is no need to insert the certificates into the DAG, because these certificates - /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. - async fn sync_bft_dag_at_bootup(&self, certificates: Vec>) { - // Acquire the BFT write lock. - let mut dag = self.dag.write(); - - // Commit all the certificates. - for certificate in certificates { - dag.commit(&certificate, self.storage().max_gc_rounds()); - } - } - - /// Spawns a task with the given future; it should only be used for long-running tasks. - fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); - } - /// Shuts down the BFT. pub async fn shut_down(&self) { info!("Shutting down the BFT..."); @@ -933,14 +881,12 @@ impl BFT { let _lock = self.lock.lock().await; // Shut down the primary. self.primary.shut_down().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); } } #[cfg(test)] mod tests { - use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage}; + use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage, sync::SyncCallback}; use snarkos_account::Account; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -1524,7 +1470,7 @@ mod tests { let bootup_bft = initialize_bft(account.clone(), storage_2, ledger)?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(certificates.clone()).await.unwrap(); // Check that the BFT starts from the same last committed round. assert_eq!(bft.dag.read().last_committed_round(), bootup_bft.dag.read().last_committed_round()); @@ -1703,7 +1649,7 @@ mod tests { let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone())?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates to the storage and BFT with bootup. for certificate in post_shutdown_certificates.iter() { @@ -1883,7 +1829,7 @@ mod tests { // Insert a mock DAG in the BFT without bootup. *bootup_bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates into the storage. let mut post_shutdown_certificates: Vec> = diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index d09555fbad..092cd2312a 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -1570,12 +1570,15 @@ impl Gateway { #[cfg(test)] mod prop_tests { - use crate::{ + use super::{ Gateway, + prop_tests::GatewayAddress::{Dev, Prod}, + }; + + use crate::{ MAX_WORKERS, MEMORY_POOL_PORT, Worker, - gateway::prop_tests::GatewayAddress::{Dev, Prod}, helpers::{Storage, init_primary_channels, init_worker_channels}, }; use snarkos_account::Account; diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 8dfa1229f9..f14ed56f64 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -52,69 +52,6 @@ pub fn init_consensus_channels() -> (ConsensusSender, ConsensusRe (sender, receiver) } -/// "Interface" that enables, for example, sending data from storage to the the BFT logic. -#[derive(Clone, Debug)] -pub struct BFTSender { - pub tx_primary_round: mpsc::Sender<(u64, oneshot::Sender)>, - pub tx_primary_certificate: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, - pub tx_sync_bft_dag_at_bootup: mpsc::Sender>>, - pub tx_sync_bft: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, -} - -impl BFTSender { - /// Sends the current round to the BFT. - pub async fn send_primary_round_to_bft(&self, current_round: u64) -> Result { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the current round to the BFT. - self.tx_primary_round.send((current_round, callback_sender)).await?; - // Await the callback to continue. - Ok(callback_receiver.await?) - } - - /// Sends the batch certificate to the BFT. - pub async fn send_primary_certificate_to_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT. - self.tx_primary_certificate.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the batch certificates to the BFT for syncing. - pub async fn send_sync_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT for syncing. - self.tx_sync_bft.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -/// Receiving counterpart to `BFTSender` -#[derive(Debug)] -pub struct BFTReceiver { - pub rx_primary_round: mpsc::Receiver<(u64, oneshot::Sender)>, - pub rx_primary_certificate: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, - pub rx_sync_bft_dag_at_bootup: mpsc::Receiver>>, - pub rx_sync_bft: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, -} - -/// Initializes the BFT channels, and returns the sending and receiving ends. -pub fn init_bft_channels() -> (BFTSender, BFTReceiver) { - let (tx_primary_round, rx_primary_round) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_certificate, rx_primary_certificate) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft_dag_at_bootup, rx_sync_bft_dag_at_bootup) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft, rx_sync_bft) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = BFTSender { tx_primary_round, tx_primary_certificate, tx_sync_bft_dag_at_bootup, tx_sync_bft }; - let receiver = BFTReceiver { rx_primary_round, rx_primary_certificate, rx_sync_bft_dag_at_bootup, rx_sync_bft }; - - (sender, receiver) -} - #[derive(Clone, Debug)] pub struct PrimarySender { pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 9efba541c3..756e501089 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -55,6 +55,12 @@ pub mod timestamp; pub use timestamp::*; use anyhow::{Result, bail}; +#[cfg(feature = "locktick")] +use locktick::{ + LockGuard, + parking_lot::{RwLock, RwLockReadGuard}, +}; +#[cfg(not(feature = "locktick"))] use parking_lot::{RwLock, RwLockReadGuard}; /// Formats an ID into a truncated identifier (for logging purposes). @@ -102,7 +108,7 @@ impl CallbackHandle { /// Cannot be shared across await-boundaries. #[cfg(feature = "locktick")] #[inline] - pub fn get_ref(&self) -> RwLockReadGuard<'_, Option> { + pub fn get_ref(&self) -> LockGuard>> { self.callback.read() } diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index c92032d4c7..a46598b0c0 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -35,7 +35,7 @@ mod bft; pub use bft::*; mod gateway; -pub use gateway::*; +pub use gateway::Gateway; mod primary; pub use primary::*; diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 72a86574ae..d190b1d0de 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -14,18 +14,16 @@ // limitations under the License. use crate::{ - Gateway, MAX_BATCH_DELAY_IN_MS, MAX_WORKERS, MIN_BATCH_DELAY_IN_SECS, PRIMARY_PING_IN_MS, - Sync, - Transport, WORKER_PING_IN_MS, Worker, events::{BatchPropose, BatchSignature, Event}, + gateway::{Gateway, Transport}, helpers::{ - BFTSender, + CallbackHandle, PrimaryReceiver, PrimarySender, Proposal, @@ -39,6 +37,7 @@ use crate::{ now, }, spawn_blocking, + sync::{Sync, SyncCallback}, }; use snarkos_account::Account; use snarkos_node_bft_events::PrimaryPing; @@ -58,6 +57,7 @@ use snarkvm::{ }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use futures::stream::{FuturesUnordered, StreamExt}; use indexmap::{IndexMap, IndexSet}; @@ -79,11 +79,22 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{sync::OnceCell, task::JoinHandle}; +use tokio::task::JoinHandle; /// A helper type for an optional proposed batch. pub type ProposedBatch = RwLock>>; +/// This callback trait allows listening to changes in the Primary, such as round advancement. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait PrimaryCallback: Send + std::marker::Sync { + /// Notifies that a new round has started. + fn update_to_next_round(&self, current_round: u64) -> bool; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// The primary logic of a node. /// AleoBFT adopts a primary-worker architecture as described in the Narwhal and Tusk paper (Section 4.2). #[derive(Clone)] @@ -98,8 +109,8 @@ pub struct Primary { ledger: Arc>, /// The workers. workers: Arc<[Worker]>, - /// The BFT sender. - bft_sender: Arc>>, + /// The primary callback (used by [`BFT`]). + primary_callback: Arc>>>, /// The batch proposal, if the primary is currently proposing a batch. proposed_batch: Arc>, /// The timestamp of the most recent proposed batch. @@ -142,7 +153,7 @@ impl Primary { storage, ledger, workers: Arc::from(vec![]), - bft_sender: Default::default(), + primary_callback: Default::default(), proposed_batch: Default::default(), latest_proposed_batch_timestamp: Default::default(), signed_proposals: Default::default(), @@ -196,16 +207,16 @@ impl Primary { pub async fn run( &mut self, ping: Option>>, - bft_sender: Option>, + primary_callback: Option>>, + sync_callback: Option>>, primary_sender: PrimarySender, primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the primary instance of the memory pool..."); // Set the BFT sender. - if let Some(bft_sender) = &bft_sender { - // Set the BFT sender in the primary. - self.bft_sender.set(bft_sender.clone()).expect("BFT sender already set"); + if let Some(callback) = primary_callback { + self.primary_callback.set(callback)?; } // Construct a map of the worker senders. @@ -235,7 +246,7 @@ impl Primary { self.workers = Arc::from(workers); // Next, initialize the sync module and sync the storage from ledger. - self.sync.initialize(bft_sender).await?; + self.sync.initialize(sync_callback).await?; // Next, load and process the proposal cache before running the sync module. self.load_proposal_cache().await?; // Next, run the sync module. @@ -429,17 +440,12 @@ impl Primary { // Ensure the primary has not proposed a batch for this round before. if self.storage.contains_certificate_in_round_from(round, self.gateway.account().address()) { // If a BFT sender was provided, attempt to advance the current round. - if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(self.current_round()).await { + if let Some(cb) = &*self.primary_callback.get_ref() { + match cb.update_to_next_round(self.current_round()) { // 'is_ready' is true if the primary is ready to propose a batch for the next round. - Ok(true) => (), // continue, + true => (), // continue, // 'is_ready' is false if the primary is not ready to propose a batch for the next round. - Ok(false) => return Ok(()), - // An error occurred while attempting to advance the current round. - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } + false => return Ok(()), } } debug!("Primary is safely skipping {}", format!("(round {round} was already certified)").dimmed()); @@ -1537,14 +1543,8 @@ impl Primary { // Attempt to advance to the next round. if current_round < next_round { // If a BFT sender was provided, send the current round to the BFT. - let is_ready = if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(current_round).await { - Ok(is_ready) => is_ready, - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } - } + let is_ready = if let Some(cb) = self.primary_callback.get() { + cb.update_to_next_round(current_round) } // Otherwise, handle the Narwhal case. else { @@ -1631,12 +1631,11 @@ impl Primary { spawn_blocking!(storage.insert_certificate(certificate_, transmissions, Default::default()))?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { + if let Some(cb) = self.primary_callback.get() { // Await the callback to continue. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate.clone()).await { - warn!("Failed to update the BFT DAG from primary - {e}"); - return Err(e); - }; + cb.add_new_certificate(certificate.clone()) + .await + .with_context(|| "Failed to add new certificate from primary")?; } // Broadcast the certified batch to all validators. self.gateway.broadcast(Event::BatchCertified(certificate.clone().into())); @@ -1718,12 +1717,8 @@ impl Primary { spawn_blocking!(storage.insert_certificate(certificate_, missing_transmissions, Default::default()))?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { - // Send the certificate to the BFT. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate).await { - warn!("Failed to update the BFT DAG from sync: {e}"); - return Err(e); - }; + if let Some(cb) = self.primary_callback.get() { + cb.add_new_certificate(certificate).await.with_context(|| "Failed to update the DAG from sync")?; } } Ok(()) @@ -1934,10 +1929,12 @@ impl Primary { /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the primary..."); + // Remove the callback. + self.primary_callback.clear(); // Shut down the workers. self.workers.iter().for_each(|worker| worker.shut_down()); // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Save the current proposal cache to disk. let proposal_cache = { let proposal = self.proposed_batch.write().take(); diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index e6540c9118..c05f227f7e 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -14,13 +14,11 @@ // limitations under the License. use crate::{ - Gateway, MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, - Transport, events::DataBlocks, - gateway::SyncCallback as GatewaySyncCallback, - helpers::{BFTSender, Pending, Storage, fmt_id, max_redundant_requests}, + gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, + helpers::{Pending, Storage, fmt_id, max_redundant_requests}, spawn_blocking, }; use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; @@ -32,12 +30,15 @@ use snarkvm::{ prelude::{cfg_into_iter, cfg_iter}, }; -use anyhow::{Result, anyhow, bail}; +use anyhow::{Context, Result, anyhow, bail, ensure}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; +use locktick::{ + parking_lot::{Mutex, RwLock}, + tokio::Mutex as TMutex, +}; #[cfg(not(feature = "locktick"))] -use parking_lot::Mutex; +use parking_lot::{Mutex, RwLock}; #[cfg(not(feature = "serial"))] use rayon::prelude::*; use std::{ @@ -50,11 +51,21 @@ use std::{ #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; use tokio::{ - sync::{OnceCell, oneshot}, + sync::oneshot, task::JoinHandle, time::{sleep, timeout}, }; +/// This callback trait allows listening to synchronization updates, such as discorvering new `BatchCertificate`s. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait SyncCallback: Send + std::marker::Sync { + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()>; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// Block synchronization logic for validators. /// /// Synchronization works differently for nodes that act as validators in AleoBFT; @@ -79,8 +90,8 @@ pub struct Sync { block_sync: Arc>, /// The pending certificates queue. pending: Arc, BatchCertificate>>, - /// The BFT sender. - bft_sender: Arc>>, + /// The sync callback (used by [`BFT`]). + sync_callback: Arc>>>>, /// Handles to the spawned background tasks. handles: Arc>>>, /// The response lock. @@ -114,7 +125,7 @@ impl Sync { ledger, block_sync, pending: Default::default(), - bft_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), response_lock: Default::default(), sync_lock: Default::default(), @@ -123,10 +134,11 @@ impl Sync { } /// Initializes the sync module and sync the storage with the ledger at bootup. - pub async fn initialize(&self, bft_sender: Option>) -> Result<()> { - // If a BFT sender was provided, set it. - if let Some(bft_sender) = bft_sender { - self.bft_sender.set(bft_sender).expect("BFT sender already set in gateway"); + pub async fn initialize(&self, sync_callback: Option>>) -> Result<()> { + // If a callback was provided, set it. + if let Some(callback) = sync_callback { + let prev = self.sync_callback.write().replace(callback); + ensure!(prev.is_none(), "Sync callback was already set"); } info!("Syncing storage with the ledger..."); @@ -138,6 +150,11 @@ impl Sync { Ok(()) } + /// Get the `SyncCallback` if one is set. + fn get_callback(&self) -> Option>> { + self.sync_callback.read().clone() + } + /// Sends the given batch of block requests to peers. /// /// Responses to block requests will eventually be processed by `Self::try_advancing_block_synchronization`. @@ -392,11 +409,9 @@ impl Sync { .collect::>(); // If a BFT sender was provided, send the certificates to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { + if let Some(cb) = self.get_callback() { // Await the callback to continue. - if let Err(e) = bft_sender.tx_sync_bft_dag_at_bootup.send(certificates).await { - bail!("Failed to update the BFT DAG from sync: {e}"); - } + cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; } self.block_sync.set_sync_height(block_height); @@ -630,11 +645,10 @@ impl Sync { for certificate in certificates { // If a BFT sender was provided, send the certificate to the BFT. // For validators, BFT spawns a receiver task in `BFT::start_handlers`. - if let Some(bft_sender) = self.bft_sender.get() { - // Await the callback to continue. - if let Err(err) = bft_sender.send_sync_bft(certificate).await { - bail!("Failed to sync certificate - {err}"); - }; + if let Some(cb) = self.get_callback() { + cb.add_new_certificate(certificate) + .await + .with_context(|| "Failed to sync certificate - {err}")?; } } } @@ -869,12 +883,14 @@ impl Sync { /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the sync module..."); + // Remove the callback. + let _ = self.sync_callback.write().take(); // Acquire the response lock. let _lock = self.response_lock.lock().await; // Acquire the sync lock. let _lock = self.sync_lock.lock().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + // Abort all running tasks. + self.handles.lock().drain(..).for_each(|handle| handle.abort()); } } diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 04c62ab6cd..df8b9d6aad 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -17,8 +17,8 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, MAX_WORKERS, ProposedBatch, - Transport, events::{Event, TransmissionRequest, TransmissionResponse}, + gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, spawn_blocking, }; diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index 3af72f1da7..ac847689f5 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -226,7 +226,7 @@ impl TestNetwork { bft.run(None, None, primary_sender, primary_receiver).await.unwrap(); } else { // Setup the channels and start the primary. - validator.primary.run(None, None, primary_sender, primary_receiver).await.unwrap(); + validator.primary.run(None, None, None, primary_sender, primary_receiver).await.unwrap(); } if let Some(interval_ms) = self.config.fire_transmissions { diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 80415491bf..3e3c34c0a3 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -53,7 +53,7 @@ use snarkvm::{ }; use aleo_std::StorageMode; -use anyhow::Result; +use anyhow::{Context, Result}; use colored::Colorize; use indexmap::IndexMap; #[cfg(feature = "locktick")] @@ -84,7 +84,7 @@ const MAX_DEPLOYMENTS_PER_INTERVAL: usize = 1; /// /// Consensus acts as a rate limiter to prevents workers in BFT from being overloaded. /// Each worker maintains a ready queue (which is essentially also a mempool), but verifies transactions/solutions -/// before enquing them. +/// before enqueuing them. /// Consensus only passes more transactions/solutions to the BFT layer if its ready queues are not already full. #[derive(Clone)] pub struct Consensus { @@ -501,8 +501,7 @@ impl Consensus { let result = spawn_blocking! { self_.try_advance_to_next_block(subdag, transmissions_) }; // If the block failed to advance, reinsert the transmissions into the memory pool. - if let Err(e) = &result { - error!("Unable to advance to the next block - {e}"); + if result.is_err() { // On failure, reinsert the transmissions into the memory pool. self.reinsert_transmissions(transmissions).await; } @@ -517,6 +516,8 @@ impl Consensus { subdag: Subdag, transmissions: IndexMap, Transmission>, ) -> Result<()> { + trace!("Trying to advance to new subdag anchored at round {}", subdag.anchor_round()); + #[cfg(feature = "metrics")] let start = subdag.leader_certificate().batch_header().timestamp(); #[cfg(feature = "metrics")] @@ -525,14 +526,20 @@ impl Consensus { let current_block_timestamp = self.ledger.latest_block().header().metadata().timestamp(); // Create the candidate next block. - let next_block = self.ledger.prepare_advance_to_next_quorum_block(subdag, transmissions)?; + let next_block = self + .ledger + .prepare_advance_to_next_quorum_block(subdag, transmissions) + .with_context(|| "Ledger preparation for advancement to next block failed")?; // Check that the block is well-formed. - self.ledger.check_next_block(&next_block)?; + self.ledger.check_next_block(&next_block).with_context(|| "Check for new block failed")?; // Advance to the next block. - self.ledger.advance_to_next_block(&next_block)?; + self.ledger.advance_to_next_block(&next_block).with_context(|| "Ledger advancement to new block failed")?; + + // Note: Do not return failure after this point, as the ledger already advanced. + #[cfg(feature = "telemetry")] // Fetch the latest committee - let latest_committee = self.ledger.current_committee()?; + let latest_committee = self.ledger.current_committee(); // If the next block starts a new epoch, clear the existing solutions. if next_block.height() % N::NUM_BLOCKS_PER_EPOCH == 0 { @@ -543,8 +550,10 @@ impl Consensus { } // Notify peers that we have a new block. - let locators = self.block_sync.get_block_locators()?; - self.ping.update_block_locators(locators); + match self.block_sync.get_block_locators() { + Ok(locators) => self.ping.update_block_locators(locators), + Err(err) => warn!("Failed to generate new block locators after block advancement: {err:?}"), + } // Make block sync aware of the new block. self.block_sync.set_sync_height(next_block.height()); @@ -571,7 +580,7 @@ impl Consensus { metrics::gauge(metrics::blocks::CUMULATIVE_PROOF_TARGET, cumulative_proof_target as f64); #[cfg(feature = "telemetry")] - { + if let Ok(latest_committee) = latest_committee { // Retrieve the latest participation scores. let participation_scores = self.bft().primary().gateway().validator_telemetry().get_participation_scores(&latest_committee); From b8d80449acc645eec1ef1c42b532801d6aa570c8 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 19:39:56 -0700 Subject: [PATCH 03/16] misc(consensus): replace consensus channel with a callback --- Cargo.lock | 1 + node/bft/examples/simple_node.rs | 50 +++++++++++++++-------------- node/bft/src/bft.rs | 55 ++++++++++++++++---------------- node/bft/src/helpers/channels.rs | 25 +-------------- node/bft/src/lib.rs | 2 +- node/bft/src/sync/mod.rs | 32 +++++++------------ node/consensus/Cargo.toml | 2 ++ node/consensus/src/lib.rs | 46 ++++++++++---------------- 8 files changed, 85 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ffdde8204c..b384f22c19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3974,6 +3974,7 @@ version = "4.2.1" dependencies = [ "aleo-std", "anyhow", + "async-trait", "colored 3.0.0", "indexmap 2.11.3", "itertools 0.14.0", diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 0dbb43665c..82b0cc4625 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -23,9 +23,10 @@ use aleo_std::StorageMode; use snarkos_account::Account; use snarkos_node_bft::{ BFT, + BftCallback, MEMORY_POOL_PORT, Primary, - helpers::{ConsensusReceiver, PrimarySender, Storage, init_consensus_channels, init_primary_channels}, + helpers::{PrimarySender, Storage, init_primary_channels}, }; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -37,7 +38,7 @@ use snarkvm::{ Ledger, block::Transaction, committee::{Committee, MIN_VALIDATOR_STAKE}, - narwhal::{BatchHeader, Data}, + narwhal::{BatchHeader, Data, Subdag, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, store::{ConsensusStore, helpers::memory::ConsensusMemory}, }, @@ -142,15 +143,13 @@ pub async fn start_bft( // Initialize the trusted validators. let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels::(); - // Initialize the consensus receiver handler. - consensus_handler(consensus_receiver); + let consensus_handler = Arc::new(ConsensusHandler {}); // Initialize the BFT instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut bft = BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?; // Run the BFT instance. - bft.run(None, Some(consensus_sender), sender.clone(), receiver).await?; + bft.run(None, Some(consensus_handler), sender.clone(), receiver).await?; // Retrieve the BFT's primary. let primary = bft.primary(); // Handle OS signals. @@ -308,25 +307,28 @@ fn initialize_components(node_id: u16, num_nodes: u16) -> Result<(Committee) { - let ConsensusReceiver { mut rx_consensus_subdag } = receiver; +struct ConsensusHandler {} + +#[async_trait::async_trait] +impl BftCallback for ConsensusHandler { + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()> { + // Determine the amount of time to sleep for the subdag. + let subdag_ms = subdag.values().flatten().count(); + // Determine the amount of time to sleep for the transmissions. + let transmissions_ms = transmissions.len() * 25; + // Add a constant delay. + let constant_ms = 100; + // Compute the total amount of time to sleep. + let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; + // Sleep for the determined amount of time. + tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - tokio::task::spawn(async move { - while let Some((subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - // Determine the amount of time to sleep for the subdag. - let subdag_ms = subdag.values().flatten().count(); - // Determine the amount of time to sleep for the transmissions. - let transmissions_ms = transmissions.len() * 25; - // Add a constant delay. - let constant_ms = 100; - // Compute the total amount of time to sleep. - let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; - // Sleep for the determined amount of time. - tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - // Call the callback. - callback.send(Ok(())).ok(); - } - }); + Ok(()) + } } /// Returns the trusted validators. diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index b497304e6f..6f8d80d620 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,7 +15,7 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - helpers::{ConsensusSender, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + helpers::{CallbackHandle, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, primary::{Primary, PrimaryCallback}, sync::SyncCallback, }; @@ -50,7 +50,16 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::sync::{OnceCell, oneshot}; + +#[async_trait::async_trait] +pub trait BftCallback: Send + std::marker::Sync { + /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()>; +} #[derive(Clone)] pub struct BFT { @@ -62,8 +71,8 @@ pub struct BFT { leader_certificate: Arc>>>, /// The timer for the leader certificate to be received. leader_certificate_timer: Arc, - /// The consensus sender. - consensus_sender: Arc>>, + /// The BFT callback (used by `Consensus`). + bft_callback: Arc>>>, /// The BFT lock. lock: Arc>, } @@ -86,7 +95,7 @@ impl BFT { dag: Default::default(), leader_certificate: Default::default(), leader_certificate_timer: Default::default(), - consensus_sender: Default::default(), + bft_callback: Default::default(), lock: Default::default(), }) } @@ -98,23 +107,22 @@ impl BFT { pub async fn run( &mut self, ping: Option>>, - consensus_sender: Option>, + bft_callback: Option>>, primary_sender: PrimarySender, primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the BFT instance..."); - // Set up callbacks. + // Set up callbacks to pass to the primary. let primary_callback = Some(Arc::new(self.clone()) as Arc>); - let sync_callback = Some(Arc::new(self.clone()) as Arc>); // Next, run the primary instance. self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; - // Lastly, set the consensus sender. - // Note: This ensures that, during initial syncing, that the BFT does not advance the ledger. - if let Some(consensus_sender) = consensus_sender { - self.consensus_sender.set(consensus_sender).expect("Consensus sender already set"); + // Lastly, set up callbacks for BFT itself. + // Note: This ensures that, during initial syncing, the BFT does not advance the ledger. + if let Some(callback) = bft_callback { + self.bft_callback.set(callback)?; } Ok(()) } @@ -720,23 +728,12 @@ impl BFT { "BFT failed to commit - the subdag anchor round {anchor_round} does not match the leader round {leader_round}", ); - // Trigger consensus. - if let Some(consensus_sender) = self.consensus_sender.get() { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); + // Trigger the callback (if any). + if let Some(cb) = self.bft_callback.get() { // Send the subdag and transmissions to consensus. - consensus_sender.tx_consensus_subdag.send((subdag, transmissions, callback_sender)).await?; - // Await the callback to continue. - match callback_receiver.await { - Ok(Ok(())) => (), // continue - Ok(Err(e)) => { - error!("BFT failed to advance the subdag for round {anchor_round} - {e}"); - return Ok(()); - } - Err(e) => { - error!("BFT failed to receive the callback for round {anchor_round} - {e}"); - return Ok(()); - } + if let Err(err) = cb.process_bft_subdag(subdag, transmissions).await { + error!("BFT failed to advance the subdag for round {anchor_round}: {err:?}"); + return Ok(()); } } @@ -877,6 +874,8 @@ impl BFT { /// Shuts down the BFT. pub async fn shut_down(&self) { info!("Shutting down the BFT..."); + // Remove the callback. + self.bft_callback.clear(); // Acquire the lock. let _lock = self.lock.lock().await; // Shut down the primary. diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index f14ed56f64..1cc3f1cec0 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -18,40 +18,17 @@ use snarkvm::{ console::network::*, ledger::{ block::Transaction, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, + narwhal::{BatchCertificate, Data, TransmissionID}, puzzle::{Solution, SolutionID}, }, prelude::Result, }; -use indexmap::IndexMap; use std::net::SocketAddr; use tokio::sync::{mpsc, oneshot}; const MAX_CHANNEL_SIZE: usize = 8192; -#[derive(Debug)] -pub struct ConsensusSender { - pub tx_consensus_subdag: - mpsc::Sender<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -#[derive(Debug)] -pub struct ConsensusReceiver { - pub rx_consensus_subdag: - mpsc::Receiver<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -/// Initializes the consensus channels. -pub fn init_consensus_channels() -> (ConsensusSender, ConsensusReceiver) { - let (tx_consensus_subdag, rx_consensus_subdag) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = ConsensusSender { tx_consensus_subdag }; - let receiver = ConsensusReceiver { rx_consensus_subdag }; - - (sender, receiver) -} - #[derive(Clone, Debug)] pub struct PrimarySender { pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index a46598b0c0..d2b5c29e2d 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -32,7 +32,7 @@ pub use snarkos_node_bft_storage_service as storage_service; pub mod helpers; mod bft; -pub use bft::*; +pub use bft::{BFT, BftCallback}; mod gateway; pub use gateway::Gateway; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index c05f227f7e..4fb42c05e4 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -18,7 +18,7 @@ use crate::{ PRIMARY_PING_IN_MS, events::DataBlocks, gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, - helpers::{Pending, Storage, fmt_id, max_redundant_requests}, + helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, spawn_blocking, }; use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; @@ -30,15 +30,12 @@ use snarkvm::{ prelude::{cfg_into_iter, cfg_iter}, }; -use anyhow::{Context, Result, anyhow, bail, ensure}; +use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::{Mutex, RwLock}, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] -use parking_lot::{Mutex, RwLock}; +use parking_lot::Mutex; #[cfg(not(feature = "serial"))] use rayon::prelude::*; use std::{ @@ -91,7 +88,7 @@ pub struct Sync { /// The pending certificates queue. pending: Arc, BatchCertificate>>, /// The sync callback (used by [`BFT`]). - sync_callback: Arc>>>>, + sync_callback: Arc>>>, /// Handles to the spawned background tasks. handles: Arc>>>, /// The response lock. @@ -137,8 +134,7 @@ impl Sync { pub async fn initialize(&self, sync_callback: Option>>) -> Result<()> { // If a callback was provided, set it. if let Some(callback) = sync_callback { - let prev = self.sync_callback.write().replace(callback); - ensure!(prev.is_none(), "Sync callback was already set"); + self.sync_callback.set(callback)?; } info!("Syncing storage with the ledger..."); @@ -150,11 +146,6 @@ impl Sync { Ok(()) } - /// Get the `SyncCallback` if one is set. - fn get_callback(&self) -> Option>> { - self.sync_callback.read().clone() - } - /// Sends the given batch of block requests to peers. /// /// Responses to block requests will eventually be processed by `Self::try_advancing_block_synchronization`. @@ -408,9 +399,8 @@ impl Sync { .flatten() .collect::>(); - // If a BFT sender was provided, send the certificates to the BFT. - if let Some(cb) = self.get_callback() { - // Await the callback to continue. + // If a callback was provided, send the certificates to it. + if let Some(cb) = self.sync_callback.get() { cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; } @@ -643,9 +633,9 @@ impl Sync { // Sync the BFT DAG with the certificates. for certificate in certificates { - // If a BFT sender was provided, send the certificate to the BFT. + // If a callback was provided, send the certificate to ti. // For validators, BFT spawns a receiver task in `BFT::start_handlers`. - if let Some(cb) = self.get_callback() { + if let Some(cb) = self.sync_callback.get() { cb.add_new_certificate(certificate) .await .with_context(|| "Failed to sync certificate - {err}")?; @@ -884,7 +874,7 @@ impl Sync { pub async fn shut_down(&self) { info!("Shutting down the sync module..."); // Remove the callback. - let _ = self.sync_callback.write().take(); + self.sync_callback.clear(); // Acquire the response lock. let _lock = self.response_lock.lock().await; // Acquire the sync lock. diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index 33421460f1..520e7c9bd2 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -30,6 +30,8 @@ telemetry = [ "snarkos-node-bft/telemetry" ] cuda = [ "snarkvm/cuda", "snarkos-account/cuda", "snarkos-node-bft-ledger-service/cuda" ] serial = [ "snarkos-node-bft-ledger-service/serial" ] +[dependencies.async-trait] +workspace = true [dependencies.aleo-std] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 3e3c34c0a3..4a2bb3c17e 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -27,16 +27,10 @@ extern crate snarkos_node_metrics as metrics; use snarkos_account::Account; use snarkos_node_bft::{ BFT, + BftCallback, MAX_BATCH_DELAY_IN_MS, Primary, - helpers::{ - ConsensusReceiver, - PrimarySender, - Storage as NarwhalStorage, - fmt_id, - init_consensus_channels, - init_primary_channels, - }, + helpers::{PrimarySender, Storage as NarwhalStorage, fmt_id, init_primary_channels}, spawn_blocking, }; use snarkos_node_bft_ledger_service::LedgerService; @@ -152,12 +146,12 @@ impl Consensus { info!("Starting the consensus instance..."); - // First, initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels(); - // Then, start the consensus handlers. - _self.start_handlers(consensus_receiver); + _self.start_handlers(); // Lastly, also start BFTs handlers. - _self.bft.run(Some(ping), Some(consensus_sender), _self.primary_sender.clone(), primary_receiver).await?; + _self + .bft + .run(Some(ping), Some(Arc::new(_self.clone())), _self.primary_sender.clone(), primary_receiver) + .await?; Ok(_self) } @@ -456,17 +450,7 @@ impl Consensus { /// Starts the consensus handlers. /// /// This is only invoked once, in the constructor. - fn start_handlers(&self, consensus_receiver: ConsensusReceiver) { - let ConsensusReceiver { mut rx_consensus_subdag } = consensus_receiver; - - // Process the committed subdag and transmissions from the BFT. - let self_ = self.clone(); - self.spawn(async move { - while let Some((committed_subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - self_.process_bft_subdag(committed_subdag, transmissions, callback).await; - } - }); - + fn start_handlers(&self) { // Process the unconfirmed transactions in the memory pool. // // TODO (kaimast): This shouldn't happen periodically but only when new batches/blocks are accepted @@ -487,14 +471,16 @@ impl Consensus { } }); } +} +#[async_trait::async_trait] +impl BftCallback for Consensus { /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. async fn process_bft_subdag( &self, subdag: Subdag, transmissions: IndexMap, Transmission>, - callback: oneshot::Sender>, - ) { + ) -> Result<()> { // Try to advance to the next block. let self_ = self.clone(); let transmissions_ = transmissions.clone(); @@ -502,14 +488,14 @@ impl Consensus { // If the block failed to advance, reinsert the transmissions into the memory pool. if result.is_err() { - // On failure, reinsert the transmissions into the memory pool. self.reinsert_transmissions(transmissions).await; } - // Send the callback **after** advancing to the next block. - // Note: We must await the block to be advanced before sending the callback. - callback.send(result).ok(); + + result } +} +impl Consensus { /// Attempts to advance the ledger to the next block, and updates the metrics (if enabled) accordingly. fn try_advance_to_next_block( &self, From 8dad4716476005b6582fd0e6b165dfafd49dfb19 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Tue, 16 Sep 2025 14:36:54 -0700 Subject: [PATCH 04/16] misc(bft): replace PrimarySender with a callback --- Cargo.lock | 2 - node/bft/Cargo.toml | 13 +- node/bft/examples/simple_node.rs | 70 ++-- node/bft/ledger-service/src/traits.rs | 6 +- node/bft/src/bft.rs | 86 +++-- node/bft/src/gateway.rs | 117 +++++-- node/bft/src/helpers/channels.rs | 94 +---- node/bft/src/helpers/dag.rs | 4 +- node/bft/src/helpers/partition.rs | 4 +- node/bft/src/lib.rs | 4 +- node/bft/src/primary.rs | 478 ++++++++++++-------------- node/bft/src/sync/mod.rs | 2 +- node/bft/tests/bft_e2e.rs | 21 +- node/bft/tests/common/primary.rs | 35 +- node/bft/tests/common/utils.rs | 33 +- node/bft/tests/gateway_e2e.rs | 9 +- node/bft/tests/narwhal_e2e.rs | 12 +- node/consensus/Cargo.toml | 15 +- node/consensus/src/lib.rs | 35 +- 19 files changed, 461 insertions(+), 579 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b384f22c19..87cd5b67e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3984,8 +3984,6 @@ dependencies = [ "parking_lot", "snarkos-account", "snarkos-node-bft", - "snarkos-node-bft-ledger-service", - "snarkos-node-bft-storage-service", "snarkos-node-metrics", "snarkos-node-sync", "snarkvm", diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 61ab97f8b5..1d5015b33c 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -38,11 +38,15 @@ cuda = [ "snarkos-node-bft-ledger-service/cuda", "snarkos-node-sync/cuda" ] +persistent-storage = [ "snarkos-node-bft-storage-service/persistent" ] test = [ # "snarkvm/test" this breaks some of the tests - "snarkvm/test-helpers", "snarkos-node-bft-ledger-service/test", - "snarkos-node-bft-storage-service/test" + "snarkos-node-bft-storage-service/test", + "test-helpers" +] +test-helpers = [ + "snarkvm/test-helpers", ] serial = [ "snarkos-node-bft-ledger-service/serial" ] @@ -200,3 +204,8 @@ workspace = true [dev-dependencies.mockall] version = "0.12.1" + +[[test]] +name = "gateway-e2e" +path = "./tests/gateway_e2e.rs" +required-features = [ "test-helpers" ] diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 82b0cc4625..1f473693e1 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -21,13 +21,7 @@ extern crate snarkos_node_metrics as metrics; use aleo_std::StorageMode; use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - BftCallback, - MEMORY_POOL_PORT, - Primary, - helpers::{PrimarySender, Storage, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, BftCallback, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; @@ -67,7 +61,7 @@ use std::{ str::FromStr, sync::{Arc, Mutex, OnceLock, atomic::AtomicBool}, }; -use tokio::{net::TcpListener, sync::oneshot}; +use tokio::net::TcpListener; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -117,13 +111,7 @@ pub fn initialize_logger(verbosity: u8) { /**************************************************************************************************/ /// Starts the BFT instance. -pub async fn start_bft( - node_id: u16, - num_nodes: u16, - peers: HashMap, -) -> Result<(BFT, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +pub async fn start_bft(node_id: u16, num_nodes: u16, peers: HashMap) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -146,16 +134,17 @@ pub async fn start_bft( let consensus_handler = Arc::new(ConsensusHandler {}); // Initialize the BFT instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut bft = - BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?; + let bft = + BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None) + .await?; // Run the BFT instance. - bft.run(None, Some(consensus_handler), sender.clone(), receiver).await?; + bft.run(None, Some(consensus_handler)).await?; // Retrieve the BFT's primary. let primary = bft.primary(); // Handle OS signals. handle_signals(primary); // Return the BFT instance. - Ok((bft, sender)) + Ok(bft) } /// Starts the primary instance. @@ -163,9 +152,7 @@ pub async fn start_primary( node_id: u16, num_nodes: u16, peers: HashMap, -) -> Result<(Primary, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -186,7 +173,7 @@ pub async fn start_primary( let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the primary instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut primary = Primary::::new( + let primary = Primary::::new( account, storage, ledger, @@ -195,13 +182,14 @@ pub async fn start_primary( &trusted_validators, storage_mode, None, - )?; + ) + .await?; // Run the primary instance. - primary.run(None, None, None, sender.clone(), receiver).await?; + primary.run(None, None, None).await?; // Handle OS signals. handle_signals(&primary); // Return the primary instance. - Ok((primary, sender)) + Ok(primary) } /// Initialize the translucent ledger service. @@ -369,8 +357,7 @@ fn handle_signals(primary: &Primary) { /**************************************************************************************************/ /// Fires *fake* unconfirmed solutions at the node. -fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -394,13 +381,8 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -410,8 +392,7 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u } /// Fires *fake* unconfirmed transactions at the node. -fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); +fn fire_unconfirmed_transactions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -436,13 +417,8 @@ fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -581,14 +557,14 @@ async fn main() -> Result<()> { let mut bft_holder = None; // Start the node. - let (primary, sender) = match args.mode { + let primary = match args.mode { Mode::Bft => { // Start the BFT. - let (bft, sender) = start_bft(args.id, args.num_nodes, peers).await?; + let bft = start_bft(args.id, args.num_nodes, peers).await?; // Set the BFT holder. bft_holder = Some(bft.clone()); // Return the primary and sender. - (bft.primary().clone(), sender) + bft.primary().clone() } Mode::Narwhal => start_primary(args.id, args.num_nodes, peers).await?, }; @@ -600,7 +576,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_solutions) { // Note: We allow the user to overload the solutions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_solutions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_solutions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; @@ -609,7 +585,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_transactions) { // Note: We allow the user to overload the transactions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_transactions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_transactions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; diff --git a/node/bft/ledger-service/src/traits.rs b/node/bft/ledger-service/src/traits.rs index 02b1f2d7e5..8c09857739 100644 --- a/node/bft/ledger-service/src/traits.rs +++ b/node/bft/ledger-service/src/traits.rs @@ -17,13 +17,17 @@ use snarkvm::{ ledger::{ block::{Block, Transaction}, committee::Committee, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, + narwhal::{BatchCertificate, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, prelude::{Address, ConsensusVersion, Field, Network, Result}, }; +#[cfg(feature = "ledger-write")] use indexmap::IndexMap; +#[cfg(feature = "ledger-write")] +use snarkvm::ledger::narwhal::Subdag; + use std::{fmt::Debug, ops::Range}; #[async_trait] diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 6f8d80d620..49a4138899 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,7 +15,7 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - helpers::{CallbackHandle, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + helpers::{CallbackHandle, DAG, Storage, fmt_id, now}, primary::{Primary, PrimaryCallback}, sync::SyncCallback, }; @@ -80,7 +80,7 @@ pub struct BFT { impl BFT { /// Initializes a new instance of the BFT. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -91,7 +91,8 @@ impl BFT { dev: Option, ) -> Result { Ok(Self { - primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev)?, + primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev) + .await?, dag: Default::default(), leader_certificate: Default::default(), leader_certificate_timer: Default::default(), @@ -104,20 +105,14 @@ impl BFT { /// /// This will return as soon as all required tasks are spawned. /// The function must not be called more than once per instance. - pub async fn run( - &mut self, - ping: Option>>, - bft_callback: Option>>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { + pub async fn run(&self, ping: Option>>, bft_callback: Option>>) -> Result<()> { info!("Starting the BFT instance..."); // Set up callbacks to pass to the primary. let primary_callback = Some(Arc::new(self.clone()) as Arc>); let sync_callback = Some(Arc::new(self.clone()) as Arc>); // Next, run the primary instance. - self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; + self.primary.run(ping, primary_callback, sync_callback).await?; // Lastly, set up callbacks for BFT itself. // Note: This ensures that, during initial syncing, the BFT does not advance the ledger. @@ -930,7 +925,7 @@ mod tests { } // Helper function to set up BFT for testing. - fn initialize_bft( + async fn initialize_bft( account: Account, storage: Storage, ledger: Arc>, @@ -948,11 +943,12 @@ mod tests { StorageMode::new_test(None), None, ) + .await } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_odd() -> Result<()> { + async fn test_is_leader_quorum_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -981,7 +977,7 @@ mod tests { // Initialize the account. let account = Account::new(rng)?; // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call succeeds on an odd round. let result = bft.is_leader_quorum_or_nonleaders_available(1); @@ -1004,9 +1000,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { + async fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1016,7 +1012,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Store is at round 1, and we are checking for round 2. @@ -1026,9 +1022,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even() -> Result<()> { + async fn test_is_leader_quorum_even() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1038,7 +1034,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an even round. @@ -1047,9 +1043,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_even_round_ready() -> Result<()> { + async fn test_is_even_round_ready() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -1079,7 +1075,7 @@ mod tests { let account = Account::new(rng)?; // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Set the leader certificate. @@ -1093,7 +1089,7 @@ mod tests { assert!(result); // Initialize a new BFT. - let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // If the leader certificate is not set and the timer has not expired, we are not ready for the next round. let result = bft_timer.is_even_round_ready_for_next_round(certificates.clone(), committee.clone(), 2); if !bft_timer.is_timer_expired() { @@ -1114,9 +1110,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_odd() -> Result<()> { + async fn test_update_leader_certificate_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1124,7 +1120,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an odd round. @@ -1133,9 +1129,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_bad_round() -> Result<()> { + async fn test_update_leader_certificate_bad_round() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1143,7 +1139,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Ensure this call succeeds on an even round. let result = bft.update_leader_certificate_to_even_round(6); @@ -1151,9 +1147,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_even() -> Result<()> { + async fn test_update_leader_certificate_even() -> Result<()> { let rng = &mut TestRng::default(); // Set the current round. @@ -1195,7 +1191,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Set the leader certificate. *bft.leader_certificate.write() = Some(leader_certificate); @@ -1233,7 +1229,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(3); @@ -1263,7 +1259,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(2); @@ -1295,9 +1291,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { + async fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1321,7 +1317,7 @@ mod tests { /* Test missing previous certificate. */ // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // The expected error message. let error_msg = format!( @@ -1382,7 +1378,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1448,7 +1444,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1466,7 +1462,7 @@ mod tests { // Initialize a new instance of storage. let storage_2 = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT. - let bootup_bft = initialize_bft(account.clone(), storage_2, ledger)?; + let bootup_bft = initialize_bft(account.clone(), storage_2, ledger).await?; // Sync the BFT DAG at bootup. bootup_bft.sync_dag_at_bootup(certificates.clone()).await.unwrap(); @@ -1620,7 +1616,7 @@ mod tests { // Initialize the BFT without bootup. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); @@ -1645,7 +1641,7 @@ mod tests { let bootup_storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT with bootup. - let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone()).await?; // Sync the BFT DAG at bootup. bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); @@ -1823,7 +1819,7 @@ mod tests { } // Initialize the bootup BFT. let account = Account::new(rng)?; - let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bootup_bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 092cd2312a..ca8e18db45 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -20,8 +20,8 @@ use crate::{ MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Worker, - events::{EventCodec, PrimaryPing}, - helpers::{Cache, CallbackHandle, PrimarySender, Resolver, Storage, WorkerSender, assign_to_worker}, + events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, + helpers::{Cache, CallbackHandle, Resolver, Storage, WorkerSender, assign_to_worker}, spawn_blocking, }; use snarkos_account::Account; @@ -58,7 +58,7 @@ use snarkvm::{ ledger::{ Block, committee::Committee, - narwhal::{BatchHeader, Data}, + narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, }; @@ -118,7 +118,8 @@ pub trait Transport: Send + Sync { fn broadcast(&self, event: Event); } -pub trait SyncCallback: Send + Sync { +/// Callback for events specific to BlockSync. +pub trait GatewaySyncCallback: Send + Sync { /// We received a block response and can (possibly) advance synchronization. fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()>; @@ -134,6 +135,18 @@ pub trait SyncCallback: Send + Sync { fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse); } +/// Callback for primary-specific events +#[async_trait::async_trait] +pub trait GatewayPrimaryCallback: Send + Sync { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>); + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose); + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature); + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>); +} + /// The gateway maintains connections to other validators. /// For connections with clients and provers, the Router logic is used. #[derive(Clone)] @@ -162,12 +175,12 @@ pub struct Gateway { /// The validator telemetry. #[cfg(feature = "telemetry")] validator_telemetry: Telemetry, - /// The primary sender. - primary_sender: Arc>>, /// The worker senders. worker_senders: Arc>>>, /// The callback for sync messages. - sync_callback: Arc>>>, + sync_callback: Arc>>>, + /// The callback for bft/primary messages. + primary_callback: Arc>>>, /// The spawned handles. handles: Arc>>>, /// The development mode. @@ -206,7 +219,7 @@ impl Gateway { connecting_peers: Default::default(), #[cfg(feature = "telemetry")] validator_telemetry: Default::default(), - primary_sender: Default::default(), + primary_callback: Default::default(), worker_senders: Default::default(), sync_callback: Default::default(), handles: Default::default(), @@ -217,18 +230,16 @@ impl Gateway { /// Run the gateway. pub async fn run( &self, - primary_sender: PrimarySender, worker_senders: IndexMap>, - sync_callback: Option>>, + primary_callback: Arc>, + sync_callback: Option>>, ) { debug!("Starting the gateway for the memory pool..."); - // Set the primary sender. - self.primary_sender.set(primary_sender).expect("Primary sender already set in gateway"); - - // Set the worker senders. self.worker_senders.set(worker_senders).expect("The worker senders are already set"); + self.primary_callback.set(primary_callback).expect("The primary callback is already set"); + if let Some(sync_callback) = sync_callback { self.sync_callback.set(sync_callback).unwrap(); } @@ -352,11 +363,6 @@ impl Gateway { &self.validator_telemetry } - /// Returns the primary sender. - pub fn primary_sender(&self) -> &PrimarySender { - self.primary_sender.get().expect("Primary sender not set in gateway") - } - /// Returns the number of workers. pub fn num_workers(&self) -> u8 { u8::try_from(self.worker_senders.get().expect("Missing worker senders in gateway").len()) @@ -654,17 +660,32 @@ impl Gateway { match event { Event::BatchPropose(batch_propose) => { // Send the batch propose to the primary. - let _ = self.primary_sender().tx_batch_propose.send((peer_ip, batch_propose)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_propose(peer_ip, batch_propose) + .await; Ok(()) } Event::BatchSignature(batch_signature) => { // Send the batch signature to the primary. - let _ = self.primary_sender().tx_batch_signature.send((peer_ip, batch_signature)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_signature(peer_ip, batch_signature) + .await; Ok(()) } Event::BatchCertified(batch_certified) => { // Send the batch certificate to the primary. - let _ = self.primary_sender().tx_batch_certified.send((peer_ip, batch_certified.certificate)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_certified(peer_ip, batch_certified.certificate) + .await; Ok(()) } Event::BlockRequest(block_request) => { @@ -778,7 +799,11 @@ impl Gateway { } // Send the batch certificates to the primary. - let _ = self.primary_sender().tx_primary_ping.send((peer_ip, primary_certificate)).await; + self.primary_callback + .get() + .expect("No callback set") + .process_incoming_ping(peer_ip, primary_certificate) + .await; Ok(()) } Event::TransmissionRequest(request) => { @@ -958,8 +983,9 @@ impl Gateway { self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Close the listener. self.tcp.shut_down().await; - // Remove the sync callback (so it can be dropped). + // Remove the sync and primary callback (so they can be dropped). self.sync_callback.clear(); + self.primary_callback.clear(); } } @@ -1568,18 +1594,55 @@ impl Gateway { } } +#[cfg(any(test, feature = "test"))] +pub mod test_helpers { + use super::*; + + type CurrentNetwork = MainnetV0; + + #[derive(Default)] + pub struct DummyGatewayPrimaryCallback {} + + #[async_trait::async_trait] + impl GatewayPrimaryCallback for DummyGatewayPrimaryCallback { + async fn process_incoming_ping( + &self, + _peer_ip: SocketAddr, + _primary_certificate: Data>, + ) { + } + + async fn process_batch_propose(&self, _peer_ip: SocketAddr, _batch_propose: BatchPropose) {} + + async fn process_batch_signature( + &self, + _peer_ip: SocketAddr, + _batch_signature: BatchSignature, + ) { + } + + async fn process_batch_certified( + &self, + _peer_ip: SocketAddr, + _batch_certificate: Data>, + ) { + } + } +} + #[cfg(test)] mod prop_tests { use super::{ Gateway, prop_tests::GatewayAddress::{Dev, Prod}, + test_helpers::DummyGatewayPrimaryCallback, }; use crate::{ MAX_WORKERS, MEMORY_POOL_PORT, Worker, - helpers::{Storage, init_primary_channels, init_worker_channels}, + helpers::{Storage, init_worker_channels}, }; use snarkos_account::Account; use snarkos_node_bft_ledger_service::MockLedgerService; @@ -1746,8 +1809,6 @@ mod prop_tests { let gateway = Gateway::new(account, storage.clone(), storage.ledger().clone(), dev.ip(), &[], dev.port()).unwrap(); - let (primary_sender, _) = init_primary_channels(); - let (workers, worker_senders) = { // Construct a map of the worker senders. let mut tx_workers = IndexMap::new(); @@ -1772,7 +1833,7 @@ mod prop_tests { (workers, tx_workers) }; - gateway.run(primary_sender, worker_senders, None).await; + gateway.run(worker_senders, Arc::new(DummyGatewayPrimaryCallback::default()), None).await; assert_eq!( gateway.local_ip(), SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1cc3f1cec0..1f23e38264 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -13,102 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::events::{BatchPropose, BatchSignature, TransmissionRequest, TransmissionResponse}; -use snarkvm::{ - console::network::*, - ledger::{ - block::Transaction, - narwhal::{BatchCertificate, Data, TransmissionID}, - puzzle::{Solution, SolutionID}, - }, - prelude::Result, -}; +use crate::events::{TransmissionRequest, TransmissionResponse}; +use snarkvm::{console::network::*, ledger::narwhal::TransmissionID}; use std::net::SocketAddr; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::mpsc; const MAX_CHANNEL_SIZE: usize = 8192; -#[derive(Clone, Debug)] -pub struct PrimarySender { - pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, - pub tx_batch_signature: mpsc::Sender<(SocketAddr, BatchSignature)>, - pub tx_batch_certified: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_primary_ping: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_unconfirmed_solution: mpsc::Sender<(SolutionID, Data>, oneshot::Sender>)>, - pub tx_unconfirmed_transaction: mpsc::Sender<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -impl PrimarySender { - /// Sends the unconfirmed solution to the primary. - pub async fn send_unconfirmed_solution( - &self, - solution_id: SolutionID, - solution: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed solution to the primary. - self.tx_unconfirmed_solution.send((solution_id, solution, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the unconfirmed transaction to the primary. - pub async fn send_unconfirmed_transaction( - &self, - transaction_id: N::TransactionID, - transaction: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed transaction to the primary. - self.tx_unconfirmed_transaction.send((transaction_id, transaction, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct PrimaryReceiver { - pub rx_batch_propose: mpsc::Receiver<(SocketAddr, BatchPropose)>, - pub rx_batch_signature: mpsc::Receiver<(SocketAddr, BatchSignature)>, - pub rx_batch_certified: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_primary_ping: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_unconfirmed_solution: mpsc::Receiver<(SolutionID, Data>, oneshot::Sender>)>, - pub rx_unconfirmed_transaction: - mpsc::Receiver<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -/// Initializes the primary channels. -pub fn init_primary_channels() -> (PrimarySender, PrimaryReceiver) { - let (tx_batch_propose, rx_batch_propose) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_signature, rx_batch_signature) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_certified, rx_batch_certified) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_ping, rx_primary_ping) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_solution, rx_unconfirmed_solution) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_transaction, rx_unconfirmed_transaction) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = PrimarySender { - tx_batch_propose, - tx_batch_signature, - tx_batch_certified, - tx_primary_ping, - tx_unconfirmed_solution, - tx_unconfirmed_transaction, - }; - let receiver = PrimaryReceiver { - rx_batch_propose, - rx_batch_signature, - rx_batch_certified, - rx_primary_ping, - rx_unconfirmed_solution, - rx_unconfirmed_transaction, - }; - - (sender, receiver) -} - #[derive(Debug)] pub struct WorkerSender { pub tx_worker_ping: mpsc::Sender<(SocketAddr, TransmissionID)>, diff --git a/node/bft/src/helpers/dag.rs b/node/bft/src/helpers/dag.rs index 53f149734f..babb62d16b 100644 --- a/node/bft/src/helpers/dag.rs +++ b/node/bft/src/helpers/dag.rs @@ -128,7 +128,9 @@ impl DAG { // Update the recently committed IDs. let is_new = self.recent_committed_ids.entry(certificate_round).or_default().insert(certificate_id); - if !is_new { + if is_new { + trace!("Got new commit for certificate {certificate_id} at round {certificate_round}"); + } else { //TODO (kaimast): return early here? trace!("Certificate {certificate_id} was already committed for round {certificate_round}"); } diff --git a/node/bft/src/helpers/partition.rs b/node/bft/src/helpers/partition.rs index 809c9fb814..f171a2a27d 100644 --- a/node/bft/src/helpers/partition.rs +++ b/node/bft/src/helpers/partition.rs @@ -19,7 +19,7 @@ use snarkvm::{ prelude::{Network, ToBytes}, }; -use anyhow::{Result, bail}; +use anyhow::{Result, bail, ensure}; use sha2::{Digest, Sha256}; fn double_sha256(data: &[u8]) -> [u8; 32] { @@ -38,6 +38,8 @@ pub fn sha256d_to_u128(data: &[u8]) -> u128 { /// Returns the worker ID for the given transmission ID. pub fn assign_to_worker(transmission_id: impl Into>, num_workers: u8) -> Result { + ensure!(num_workers > 0, "Need at least one worker"); + // If there is only one worker, return it. if num_workers == 1 { return Ok(0); diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index d2b5c29e2d..33dbb08775 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -34,8 +34,8 @@ pub mod helpers; mod bft; pub use bft::{BFT, BftCallback}; -mod gateway; -pub use gateway::Gateway; +pub mod gateway; +pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; mod primary; pub use primary::*; diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index d190b1d0de..02af82f109 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -21,11 +21,9 @@ use crate::{ WORKER_PING_IN_MS, Worker, events::{BatchPropose, BatchSignature, Event}, - gateway::{Gateway, Transport}, + gateway::{Gateway, GatewayPrimaryCallback, Transport}, helpers::{ CallbackHandle, - PrimaryReceiver, - PrimarySender, Proposal, ProposalCache, SignedProposals, @@ -131,7 +129,7 @@ impl Primary { /// Initializes a new primary instance. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -145,22 +143,77 @@ impl Primary { let gateway = Gateway::new(account, storage.clone(), ledger.clone(), ip, trusted_validators, dev)?; // Initialize the sync module. let sync = Sync::new(gateway.clone(), storage.clone(), ledger.clone(), block_sync); + let proposed_batch = Arc::new(ProposedBatch::default()); + + // Construct a map of the worker senders. + let mut worker_senders = IndexMap::new(); + + // Initialize the workers. + let mut workers = Vec::new(); + for id in 0..MAX_WORKERS { + // Construct the worker channels. + let (tx_worker, rx_worker) = init_worker_channels(); + // Construct the worker instance. + let worker = + Worker::new(id, Arc::new(gateway.clone()), storage.clone(), ledger.clone(), proposed_batch.clone()) + .with_context(|| "Failed to initialize worker")?; + // Run the worker instance. + worker.run(rx_worker); + // Add the worker to the list of workers. + workers.push(worker); + // Add the worker sender to the map. + worker_senders.insert(id, tx_worker); + } // Initialize the primary instance. - Ok(Self { + let obj = Self { sync, - gateway, + gateway: gateway.clone(), storage, ledger, - workers: Arc::from(vec![]), + workers: Arc::from(workers), primary_callback: Default::default(), - proposed_batch: Default::default(), + proposed_batch, latest_proposed_batch_timestamp: Default::default(), signed_proposals: Default::default(), handles: Default::default(), propose_lock: Default::default(), storage_mode, - }) + }; + + // Next, initialize the gateway. + let gateway_primary_callback = Arc::new(obj.clone()) as Arc>; + let gateway_sync_callback = Arc::new(obj.sync.clone()); + obj.gateway.run(worker_senders, gateway_primary_callback, Some(gateway_sync_callback)).await; + + Ok(obj) + } + + /// Starts all remaining (background) tasks needed for the primary instance. + pub async fn run( + &self, + ping: Option>>, + primary_callback: Option>>, + sync_callback: Option>>, + ) -> Result<()> { + info!("Starting the primary instance of the memory pool..."); + + // Set the BFT sender. + if let Some(callback) = primary_callback { + self.primary_callback.set(callback)?; + } + + // Next, initialize the sync module and sync the storage from ledger. + self.sync.initialize(sync_callback).await?; + // Next, load and process the proposal cache before running the sync module. + self.load_proposal_cache().await?; + // Next, run the sync module. + self.sync.run(ping).await?; + // Lastly, start the primary handlers. + // Note: This ensures the primary does not start communicating before syncing is complete. + self.start_handlers(); + + Ok(()) } /// Load the proposal cache file and update the Primary state with the stored data. @@ -203,63 +256,6 @@ impl Primary { } } - /// Run the primary instance. - pub async fn run( - &mut self, - ping: Option>>, - primary_callback: Option>>, - sync_callback: Option>>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { - info!("Starting the primary instance of the memory pool..."); - - // Set the BFT sender. - if let Some(callback) = primary_callback { - self.primary_callback.set(callback)?; - } - - // Construct a map of the worker senders. - let mut worker_senders = IndexMap::new(); - // Construct a map for the workers. - let mut workers = Vec::new(); - // Initialize the workers. - for id in 0..MAX_WORKERS { - // Construct the worker channels. - let (tx_worker, rx_worker) = init_worker_channels(); - // Construct the worker instance. - let worker = Worker::new( - id, - Arc::new(self.gateway.clone()), - self.storage.clone(), - self.ledger.clone(), - self.proposed_batch.clone(), - )?; - // Run the worker instance. - worker.run(rx_worker); - // Add the worker to the list of workers. - workers.push(worker); - // Add the worker sender to the map. - worker_senders.insert(id, tx_worker); - } - // Set the workers. - self.workers = Arc::from(workers); - - // Next, initialize the sync module and sync the storage from ledger. - self.sync.initialize(sync_callback).await?; - // Next, load and process the proposal cache before running the sync module. - self.load_proposal_cache().await?; - // Next, run the sync module. - self.sync.run(ping).await?; - // Next, initialize the gateway. - self.gateway.run(primary_sender, worker_senders, Some(Arc::new(self.sync.clone()))).await; - // Lastly, start the primary handlers. - // Note: This ensures the primary does not start communicating before syncing is complete. - self.start_handlers(primary_receiver); - - Ok(()) - } - /// Returns the current round. pub fn current_round(&self) -> u64 { self.storage.current_round() @@ -1193,16 +1189,7 @@ impl Primary { /// tries to move the the next round of batches. /// /// This function is called exactly once, in `Self::run()`. - fn start_handlers(&self, primary_receiver: PrimaryReceiver) { - let PrimaryReceiver { - mut rx_batch_propose, - mut rx_batch_signature, - mut rx_batch_certified, - mut rx_primary_ping, - mut rx_unconfirmed_solution, - mut rx_unconfirmed_transaction, - } = primary_receiver; - + fn start_handlers(&self) { // Start the primary ping sender. let self_ = self.clone(); self.spawn(async move { @@ -1259,39 +1246,6 @@ impl Primary { } }); - // Start the primary ping handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, primary_certificate)) = rx_primary_ping.recv().await { - // If the primary is not synced, then do not process the primary ping. - if self_.sync.is_synced() { - trace!("Processing new primary ping from '{peer_ip}'"); - } else { - trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - - // Spawn a task to process the primary certificate. - { - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) - else { - warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); - return; - }; - // Process the primary certificate. - let id = fmt_id(primary_certificate.id()); - let round = primary_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { - warn!("Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}"); - } - }); - } - } - }); - // Start the worker ping(s). let self_ = self.clone(); self.spawn(async move { @@ -1339,75 +1293,6 @@ impl Primary { } }); - // Start the proposed batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_propose)) = rx_batch_propose.recv().await { - // If the primary is not synced, then do not sign the batch. - if !self_.sync.is_synced() { - trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the proposed batch. - let self_ = self_.clone(); - tokio::spawn(async move { - // Process the batch proposal. - let round = batch_propose.round; - if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { - warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - - // Start the batch signature handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_signature)) = rx_batch_signature.recv().await { - // If the primary is not synced, then do not store the signature. - if !self_.sync.is_synced() { - trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Process the batch signature. - // Note: Do NOT spawn a task around this function call. Processing signatures from peers - // is a critical path, and we should only store the minimum required number of signatures. - // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), - // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. - let id = fmt_id(batch_signature.batch_id); - if let Err(e) = self_.process_batch_signature_from_peer(peer_ip, batch_signature).await { - warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); - } - } - }); - - // Start the certified batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_certificate)) = rx_batch_certified.recv().await { - // If the primary is not synced, then do not store the certificate. - if !self_.sync.is_synced() { - trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the batch certificate. - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { - warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); - return; - }; - // Process the batch certificate. - let id = fmt_id(batch_certificate.id()); - let round = batch_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { - warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - // This task periodically tries to move to the next round. // // Note: This is necessary to ensure that the primary is not stuck on a previous round @@ -1450,59 +1335,6 @@ impl Primary { } } }); - - // Start a handler to process new unconfirmed solutions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((solution_id, solution, callback)) = rx_unconfirmed_solution.recv().await { - // Compute the checksum for the solution. - let Ok(checksum) = solution.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed solution"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker((solution_id, checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed solution"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed solution. - let result = worker.process_unconfirmed_solution(solution_id, solution).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); - - // Start a handler to process new unconfirmed transactions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((transaction_id, transaction, callback)) = rx_unconfirmed_transaction.recv().await { - trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); - // Compute the checksum for the transaction. - let Ok(checksum) = transaction.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed transaction"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed transaction"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed transaction. - let result = worker.process_unconfirmed_transaction(transaction_id, transaction).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); } /// Checks if the proposed batch is expired, and clears the proposed batch if it has expired. @@ -1951,6 +1783,138 @@ impl Primary { } } +/// Handle events from the Gateway +#[async_trait::async_trait] +impl GatewayPrimaryCallback for Primary { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>) { + // If the primary is not synced, then do not process the primary ping. + if self.sync.is_synced() { + trace!("Processing new primary ping from '{peer_ip}'"); + } else { + trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + + // Spawn a task to process the primary certificate. + { + let self_ = self.clone(); + tokio::spawn(async move { + // Deserialize the primary certificate in the primary ping. + let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) else { + warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); + return; + }; + // Process the primary certificate. + let id = fmt_id(primary_certificate.id()); + let round = primary_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { + warn!( + "Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}" + ); + } + }); + } + } + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose) { + // If the primary is not synced, then do not sign the batch. + if !self.sync.is_synced() { + trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the proposed batch. + let self_ = self.clone(); + tokio::spawn(async move { + // Process the batch proposal. + let round = batch_propose.round; + if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { + warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); + } + }); + } + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature) { + // If the primary is not synced, then do not store the signature. + if !self.sync.is_synced() { + trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Process the batch signature. + // Note: Do NOT spawn a task around this function call. Processing signatures from peers + // is a critical path, and we should only store the minimum required number of signatures. + // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), + // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. + let id = fmt_id(batch_signature.batch_id); + if let Err(e) = self.process_batch_signature_from_peer(peer_ip, batch_signature).await { + warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); + } + } + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>) { + // If the primary is not synced, then do not store the certificate. + if !self.sync.is_synced() { + trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the batch certificate. + let self_ = self.clone(); + tokio::spawn(async move { + // Deserialize the batch certificate. + let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { + warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); + return; + }; + // Process the batch certificate. + let id = fmt_id(batch_certificate.id()); + let round = batch_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { + warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); + } + }); + } +} + +/// Invoked by the mempool ("Consensus"). +impl Primary { + pub async fn process_unconfirmed_solution( + &self, + solution_id: SolutionID, + solution: Data>, + ) -> Result<()> { + // Compute the checksum for the solution. + let Ok(checksum) = solution.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed solution"); + }; + + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker((solution_id, checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed solution"); + }; + + // Wait for the worker to process the unconfirmed solution. + self.workers[worker_id as usize].process_unconfirmed_solution(solution_id, solution).await + } + + pub async fn process_unconfirmed_transaction( + &self, + transaction_id: N::TransactionID, + transaction: Data>, + ) -> Result<()> { + trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); + // Compute the checksum for the transaction. + let Ok(checksum) = transaction.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed transaction"); + }; + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed transaction"); + }; + + // Wait for the worker to process the unconfirmed transaction. + self.workers[worker_id as usize].process_unconfirmed_transaction(transaction_id, transaction).await + } +} + #[cfg(test)] mod tests { use super::*; @@ -1989,7 +1953,7 @@ mod tests { } // Returns a primary and a list of accounts in the configured committee. - fn primary_with_committee( + async fn primary_with_committee( account_index: usize, accounts: &[(SocketAddr, Account)], committee: Committee, @@ -2002,7 +1966,7 @@ mod tests { let account = accounts[account_index].1.clone(); let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut primary = - Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).unwrap(); + Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).await.unwrap(); // Construct a worker instance. primary.workers = Arc::from([Worker::new( @@ -2020,7 +1984,7 @@ mod tests { primary } - fn primary_without_handlers( + async fn primary_without_handlers( rng: &mut TestRng, ) -> (Primary, Vec<(SocketAddr, Account)>) { let (accounts, committee) = sample_committee(rng); @@ -2029,7 +1993,8 @@ mod tests { &accounts, committee, CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V1).unwrap(), - ); + ) + .await; (primary, accounts) } @@ -2230,7 +2195,7 @@ mod tests { #[tokio::test] async fn test_propose_batch() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2251,7 +2216,7 @@ mod tests { #[tokio::test] async fn test_propose_batch_with_no_transmissions() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2265,7 +2230,7 @@ mod tests { async fn test_propose_batch_in_round() { let round = 3; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Fill primary storage. store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2291,7 +2256,7 @@ mod tests { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; @@ -2369,7 +2334,8 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2397,7 +2363,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2436,7 +2402,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer_when_not_synced() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2474,7 +2440,7 @@ mod tests { async fn test_batch_propose_from_peer_in_round() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2513,7 +2479,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer_wrong_round() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2556,7 +2522,7 @@ mod tests { async fn test_batch_propose_from_peer_in_round_wrong_round() { let round = 4; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2602,7 +2568,7 @@ mod tests { async fn test_batch_propose_from_peer_with_past_timestamp() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2659,13 +2625,15 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; let primary_v5 = primary_with_committee( 1, &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V5).unwrap(), - ); + ) + .await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2715,7 +2683,7 @@ mod tests { async fn test_propose_batch_with_storage_round_behind_proposal_lock() { let round = 3; let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2748,7 +2716,7 @@ mod tests { async fn test_propose_batch_with_storage_round_behind_proposal() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate previous certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2777,7 +2745,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2814,7 +2782,7 @@ mod tests { async fn test_batch_signature_from_peer_in_round() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2852,7 +2820,7 @@ mod tests { #[tokio::test] async fn test_batch_signature_from_peer_no_quorum() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2888,7 +2856,7 @@ mod tests { async fn test_batch_signature_from_peer_in_round_no_quorum() { let round = 7; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2927,7 +2895,7 @@ mod tests { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 4fb42c05e4..9ceb8a4516 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -17,7 +17,7 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, events::DataBlocks, - gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, + gateway::{Gateway, GatewaySyncCallback, Transport}, helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, spawn_blocking, }; diff --git a/node/bft/tests/bft_e2e.rs b/node/bft/tests/bft_e2e.rs index 5f2b0baeeb..ceb515065f 100644 --- a/node/bft/tests/bft_e2e.rs +++ b/node/bft/tests/bft_e2e.rs @@ -39,7 +39,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -60,7 +61,8 @@ async fn test_resync() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: false, - }); + }) + .await; network.start().await; // Let the nodes advance through the rounds. @@ -77,7 +79,8 @@ async fn test_resync() { fire_transmissions: None, log_level: None, log_connections: false, - }); + }) + .await; spare_network.start().await; for i in 1..N { @@ -106,7 +109,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -158,7 +162,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -194,7 +199,8 @@ async fn test_leader_election_consistency() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Wait for starting round to be reached @@ -246,7 +252,8 @@ async fn test_transient_break() { // Set this to Some(0..=4) to see the logs. log_level: Some(6), log_connections: false, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index ac847689f5..082f7db92e 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -19,13 +19,7 @@ use crate::common::{ utils::{fire_unconfirmed_solutions, fire_unconfirmed_transactions, initialize_logger}, }; use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - MAX_BATCH_DELAY_IN_MS, - MEMORY_POOL_PORT, - Primary, - helpers::{PrimarySender, Storage, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; use snarkvm::{ @@ -96,8 +90,6 @@ pub struct TestValidator { pub id: u16, /// The primary instance. When the BFT is enabled this is a clone of the BFT primary. pub primary: Primary, - /// The channel sender of the primary. - pub primary_sender: Option>, /// The BFT instance. This is only set if the BFT is enabled. pub bft: OnceLock>, /// The tokio handles of all long-running tasks associated with the validator (incl. cannons). @@ -108,9 +100,8 @@ pub type CurrentLedger = Ledger> impl TestValidator { pub fn fire_transmissions(&mut self, interval_ms: u64) { - let solution_handle = fire_unconfirmed_solutions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); - let transaction_handle = - fire_unconfirmed_transactions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); + let solution_handle = fire_unconfirmed_solutions(self.primary.clone(), self.id, interval_ms); + let transaction_handle = fire_unconfirmed_transactions(self.primary.clone(), self.id, interval_ms); self.handles.lock().push(solution_handle); self.handles.lock().push(transaction_handle); @@ -133,7 +124,7 @@ impl TestValidator { impl TestNetwork { // Creates a new test network with the given configuration. - pub fn new(config: TestNetworkConfig) -> Self { + pub async fn new(config: TestNetworkConfig) -> Self { let mut rng = TestRng::default(); if let Some(log_level) = config.log_level { @@ -178,6 +169,7 @@ impl TestNetwork { StorageMode::new_test(None), None, ) + .await .unwrap(); (bft.primary().clone(), Some(bft)) } else { @@ -191,17 +183,13 @@ impl TestNetwork { StorageMode::new_test(None), None, ) + .await .unwrap(); (primary, None) }; - let test_validator = TestValidator { - id: id as u16, - primary, - primary_sender: None, - bft: OnceLock::new(), - handles: Default::default(), - }; + let test_validator = + TestValidator { id: id as u16, primary, bft: OnceLock::new(), handles: Default::default() }; if let Some(bft) = bft { assert!(test_validator.bft.set(bft).is_ok()); } @@ -214,19 +202,16 @@ impl TestNetwork { // Starts each node in the network. pub async fn start(&mut self) { for validator in self.validators.values_mut() { - let (primary_sender, primary_receiver) = init_primary_channels(); - validator.primary_sender = Some(primary_sender.clone()); - // let ledger_service = validator.primary.ledger().clone(); // let sync = BlockSync::new(BlockSyncMode::Gateway, ledger_service); // sync.try_block_sync(validator.primary.gateway()).await.unwrap(); if let Some(bft) = validator.bft.get_mut() { // Setup the channels and start the bft. - bft.run(None, None, primary_sender, primary_receiver).await.unwrap(); + bft.run(None, None).await.unwrap(); } else { // Setup the channels and start the primary. - validator.primary.run(None, None, None, primary_sender, primary_receiver).await.unwrap(); + validator.primary.run(None, None, None).await.unwrap(); } if let Some(interval_ms) = self.config.fire_transmissions { diff --git a/node/bft/tests/common/utils.rs b/node/bft/tests/common/utils.rs index 8d18b20592..c7e1af3db6 100644 --- a/node/bft/tests/common/utils.rs +++ b/node/bft/tests/common/utils.rs @@ -15,11 +15,7 @@ use crate::common::{CurrentNetwork, TranslucentLedgerService, primary}; use snarkos_account::Account; -use snarkos_node_bft::{ - Gateway, - Worker, - helpers::{PrimarySender, Storage}, -}; +use snarkos_node_bft::{Gateway, Primary, Worker, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ @@ -49,8 +45,7 @@ use locktick::parking_lot::RwLock; #[cfg(not(feature = "locktick"))] use parking_lot::RwLock; use rand::Rng; -use tokio::{sync::oneshot, task::JoinHandle, time::sleep}; -use tracing::*; +use tokio::{task::JoinHandle, time::sleep}; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -88,12 +83,7 @@ pub fn initialize_logger(verbosity: u8) { } /// Fires *fake* unconfirmed solutions at the node. -pub fn fire_unconfirmed_solutions( - sender: &PrimarySender, - node_id: u16, - interval_ms: u64, -) -> JoinHandle<()> { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +pub fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) -> JoinHandle<()> { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -119,13 +109,8 @@ pub fn fire_unconfirmed_solutions( // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng).await } else { sample(&mut unique_rng).await }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -136,11 +121,10 @@ pub fn fire_unconfirmed_solutions( /// Fires *fake* unconfirmed transactions at the node. pub fn fire_unconfirmed_transactions( - sender: &PrimarySender, + primary: Primary, node_id: u16, interval_ms: u64, ) -> JoinHandle<()> { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -167,13 +151,8 @@ pub fn fire_unconfirmed_transactions( loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. diff --git a/node/bft/tests/gateway_e2e.rs b/node/bft/tests/gateway_e2e.rs index 2ac0b37509..74cdf4896a 100644 --- a/node/bft/tests/gateway_e2e.rs +++ b/node/bft/tests/gateway_e2e.rs @@ -22,13 +22,14 @@ use crate::common::{ test_peer::TestPeer, utils::{sample_gateway, sample_ledger, sample_storage}, }; + use snarkos_account::Account; -use snarkos_node_bft::{Gateway, helpers::init_primary_channels}; +use snarkos_node_bft::{Gateway, gateway::test_helpers::DummyGatewayPrimaryCallback}; use snarkos_node_bft_events::{ChallengeRequest, ChallengeResponse, Disconnect, DisconnectReason, Event, WorkerPing}; use snarkos_node_tcp::P2P; use snarkvm::{ledger::narwhal::Data, prelude::TestRng}; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use deadline::deadline; use rand::Rng; @@ -43,9 +44,7 @@ async fn new_test_gateway( let gateway = sample_gateway(accounts[0].clone(), storage, ledger); // Set up primary channels, we discard the rx as we're testing the gateway sans BFT. - let (primary_tx, _primary_rx) = init_primary_channels(); - - gateway.run(primary_tx, [].into(), None).await; + gateway.run([].into(), Arc::new(DummyGatewayPrimaryCallback::default()), None).await; (accounts, gateway) } diff --git a/node/bft/tests/narwhal_e2e.rs b/node/bft/tests/narwhal_e2e.rs index 63c803767d..c202d40fa7 100644 --- a/node/bft/tests/narwhal_e2e.rs +++ b/node/bft/tests/narwhal_e2e.rs @@ -38,7 +38,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -62,7 +63,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -114,7 +116,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -144,7 +147,8 @@ async fn test_storage_coherence() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index 520e7c9bd2..3b9e5eb071 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -21,14 +21,12 @@ default = [ ] locktick = [ "dep:locktick", "snarkos-node-bft/locktick", - "snarkos-node-bft-ledger-service/locktick", - "snarkos-node-bft-storage-service/locktick", "snarkvm/locktick" ] metrics = [ "dep:snarkos-node-metrics" ] telemetry = [ "snarkos-node-bft/telemetry" ] -cuda = [ "snarkvm/cuda", "snarkos-account/cuda", "snarkos-node-bft-ledger-service/cuda" ] -serial = [ "snarkos-node-bft-ledger-service/serial" ] +cuda = [ "snarkvm/cuda", "snarkos-account/cuda" ] +serial = [ ] [dependencies.async-trait] workspace = true @@ -66,14 +64,7 @@ workspace = true [dependencies.snarkos-node-bft] workspace = true - -[dependencies.snarkos-node-bft-ledger-service] -workspace = true -features = [ "ledger", "ledger-write" ] - -[dependencies.snarkos-node-bft-storage-service] -workspace = true -features = [ "persistent" ] +features = [ "persistent-storage" ] [dependencies.snarkos-node-sync] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 4a2bb3c17e..a2a6d40f70 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -30,11 +30,11 @@ use snarkos_node_bft::{ BftCallback, MAX_BATCH_DELAY_IN_MS, Primary, - helpers::{PrimarySender, Storage as NarwhalStorage, fmt_id, init_primary_channels}, + helpers::{Storage as NarwhalStorage, fmt_id}, + ledger_service::LedgerService, spawn_blocking, + storage_service::BFTPersistentStorage, }; -use snarkos_node_bft_ledger_service::LedgerService; -use snarkos_node_bft_storage_service::BFTPersistentStorage; use snarkos_node_sync::{BlockSync, Ping}; use snarkvm::{ @@ -56,7 +56,7 @@ use lru::LruCache; #[cfg(not(feature = "locktick"))] use parking_lot::{Mutex, RwLock}; use std::{future::Future, net::SocketAddr, num::NonZeroUsize, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle}; +use tokio::task::JoinHandle; #[cfg(feature = "metrics")] use std::collections::HashMap; @@ -86,8 +86,6 @@ pub struct Consensus { ledger: Arc>, /// The BFT. bft: BFT, - /// The primary sender. - primary_sender: PrimarySender, /// The unconfirmed solutions queue. solutions_queue: Arc, Solution>>>, /// The unconfirmed transactions queue. @@ -119,21 +117,19 @@ impl Consensus { ping: Arc>, dev: Option, ) -> Result { - // Initialize the primary channels. - let (primary_sender, primary_receiver) = init_primary_channels::(); // Initialize the Narwhal transmissions. let transmissions = Arc::new(BFTPersistentStorage::open(storage_mode.clone())?); // Initialize the Narwhal storage. let storage = NarwhalStorage::new(ledger.clone(), transmissions, BatchHeader::::MAX_GC_ROUNDS as u64); // Initialize the BFT. let bft = - BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev)?; + BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev) + .await?; // Create a new instance of Consensus. let mut _self = Self { ledger, bft, block_sync, - primary_sender, solutions_queue: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(CAPACITY_FOR_SOLUTIONS).unwrap()))), transactions_queue: Default::default(), seen_solutions: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(1 << 16).unwrap()))), @@ -148,10 +144,7 @@ impl Consensus { _self.start_handlers(); // Lastly, also start BFTs handlers. - _self - .bft - .run(Some(ping), Some(Arc::new(_self.clone())), _self.primary_sender.clone(), primary_receiver) - .await?; + _self.bft.run(Some(ping), Some(Arc::new(_self.clone()))).await?; Ok(_self) } @@ -331,7 +324,7 @@ impl Consensus { let solution_id = solution.id(); trace!("Adding unconfirmed solution '{}' to the memory pool...", fmt_id(solution_id)); // Send the unconfirmed solution to the primary. - if let Err(e) = self.primary_sender.send_unconfirmed_solution(solution_id, Data::Object(solution)).await { + if let Err(e) = self.bft.primary().process_unconfirmed_solution(solution_id, Data::Object(solution)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { // If error occurs after the first 10 blocks of the epoch, log it as a warning, otherwise ignore. @@ -431,7 +424,7 @@ impl Consensus { trace!("Adding unconfirmed {tx_type_str} transaction '{}' to the memory pool...", fmt_id(transaction_id)); // Send the unconfirmed transaction to the primary. if let Err(e) = - self.primary_sender.send_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await + self.bft.primary().process_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { @@ -606,23 +599,19 @@ impl Consensus { transmission_id: TransmissionID, transmission: Transmission, ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the transmission to the primary. match (transmission_id, transmission) { - (TransmissionID::Ratification, Transmission::Ratification) => return Ok(()), + (TransmissionID::Ratification, Transmission::Ratification) => Ok(()), (TransmissionID::Solution(solution_id, _), Transmission::Solution(solution)) => { // Send the solution to the primary. - self.primary_sender.tx_unconfirmed_solution.send((solution_id, solution, callback)).await?; + self.bft.primary().process_unconfirmed_solution(solution_id, solution).await } (TransmissionID::Transaction(transaction_id, _), Transmission::Transaction(transaction)) => { // Send the transaction to the primary. - self.primary_sender.tx_unconfirmed_transaction.send((transaction_id, transaction, callback)).await?; + self.bft.primary().process_unconfirmed_transaction(transaction_id, transaction).await } _ => bail!("Mismatching `(transmission_id, transmission)` pair in consensus"), } - // Await the callback. - callback_receiver.await? } /// Spawns a task with the given future; it should only be used for long-running tasks. From 1b54de0314548b68f12c7290e8429c4f23d2754d Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Wed, 17 Sep 2025 00:19:38 -0700 Subject: [PATCH 05/16] fix(node/bft): ensure gateways are shut down during prod tests --- node/bft/src/bft.rs | 11 ++++-- node/bft/src/gateway.rs | 35 ++++++++++++------- node/bft/src/primary.rs | 59 ++++++++++++++++++++++++-------- node/bft/tests/bft_e2e.rs | 2 ++ node/bft/tests/common/primary.rs | 6 ++-- node/tests/common/node.rs | 20 +++++++---- 6 files changed, 94 insertions(+), 39 deletions(-) diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 49a4138899..a059ec9f10 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -897,7 +897,10 @@ mod tests { use aleo_std::StorageMode; use anyhow::Result; use indexmap::{IndexMap, IndexSet}; - use std::sync::Arc; + use std::{ + net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + sync::Arc, + }; type CurrentNetwork = snarkvm::console::network::MainnetV0; @@ -932,13 +935,17 @@ mod tests { ) -> anyhow::Result> { // Create the block synchronization logic. let block_sync = Arc::new(BlockSync::new(ledger.clone())); + + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the BFT. BFT::new( account.clone(), storage.clone(), ledger.clone(), block_sync, - None, + Some(any_addr), &[], StorageMode::new_test(None), None, diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index ca8e18db45..a161cd4d9d 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -203,8 +203,14 @@ impl Gateway { (None, None) => SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, MEMORY_POOL_PORT)), (Some(ip), _) => ip, }; + + // Allow at most as many connections as the maximum committe size. + // and fail if the chosen port is not available. + let mut tcp_config = Config::new(ip, Committee::::max_committee_size()?); + tcp_config.allow_random_port = false; + // Initialize the TCP stack. - let tcp = Tcp::new(Config::new(ip, Committee::::max_committee_size()?)); + let tcp = Tcp::new(tcp_config); // Return the gateway. Ok(Self { @@ -1690,17 +1696,11 @@ mod prop_tests { impl GatewayAddress { fn ip(&self) -> Option { - if let GatewayAddress::Prod(ip) = self { - return *ip; - } - None + if let GatewayAddress::Prod(ip) = self { *ip } else { None } } fn port(&self) -> Option { - if let GatewayAddress::Dev(port) = self { - return Some(*port as u16); - } - None + if let GatewayAddress::Dev(port) = self { Some(*port as u16) } else { None } } } @@ -1757,8 +1757,8 @@ mod prop_tests { .boxed() } - #[proptest] - fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1772,10 +1772,13 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } - #[proptest] - fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1794,6 +1797,9 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest(async = "tokio")] @@ -1839,6 +1845,9 @@ mod prop_tests { SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) ); assert_eq!(gateway.num_workers(), workers.len() as u8); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest] diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 02af82f109..4dcd99bff7 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -43,15 +43,16 @@ use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_sync::{BlockSync, DUMMY_SELF_IP, Ping}; use snarkvm::{ console::{ + network::ConsensusVersion, prelude::*, types::{Address, Field}, }, ledger::{ block::Transaction, + committee::Committee, narwhal::{BatchCertificate, BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - prelude::{ConsensusVersion, committee::Committee}, }; use aleo_std::StorageMode; @@ -127,7 +128,7 @@ impl Primary { /// The maximum number of unconfirmed transmissions to send to the primary. pub const MAX_TRANSMISSIONS_TOLERANCE: usize = BatchHeader::::MAX_TRANSMISSIONS_PER_BATCH * 2; - /// Initializes a new primary instance. + /// Initializes a new primary instance and starts the gateway. #[allow(clippy::too_many_arguments)] pub async fn new( account: Account, @@ -676,15 +677,17 @@ impl Primary { // Prepare the previous batch certificate IDs. let previous_certificate_ids = previous_certificates.into_iter().map(|c| c.id()).collect(); // Sign the batch header and construct the proposal. - let (batch_header, proposal) = spawn_blocking!(BatchHeader::new( - &private_key, - round, - current_timestamp, - committee_id, - transmission_ids, - previous_certificate_ids, - &mut rand::thread_rng() - )) + let (batch_header, proposal) = spawn_blocking!({ + BatchHeader::new( + &private_key, + round, + current_timestamp, + committee_id, + transmission_ids, + previous_certificate_ids, + &mut rand::thread_rng(), + ) + }) .and_then(|batch_header| { Proposal::new(committee_lookback, batch_header.clone(), transmissions.clone()) .map(|proposal| (batch_header, proposal)) @@ -848,7 +851,7 @@ impl Primary { // Ensure the batch header from the peer is valid. let (storage, header) = (self.storage.clone(), batch_header.clone()); let missing_transmissions = - spawn_blocking!(storage.check_batch_header(&header, missing_transmissions, Default::default()))?; + spawn_blocking!({ storage.check_batch_header(&header, missing_transmissions, Default::default()) })?; // Inserts the missing transmissions into the workers. self.insert_missing_transmissions_into_workers(peer_ip, missing_transmissions.into_iter())?; @@ -1546,7 +1549,7 @@ impl Primary { if !self.storage.contains_certificate(certificate.id()) { // Store the batch certificate. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, missing_transmissions, Default::default()))?; + spawn_blocking!({ storage.insert_certificate(certificate_, missing_transmissions, Default::default()) })?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. if let Some(cb) = self.primary_callback.get() { @@ -1763,6 +1766,8 @@ impl Primary { info!("Shutting down the primary..."); // Remove the callback. self.primary_callback.clear(); + // Stop syncing. + self.sync.shut_down().await; // Shut down the workers. self.workers.iter().for_each(|worker| worker.shut_down()); // Abort the tasks. @@ -1917,6 +1922,8 @@ impl Primary { #[cfg(test)] mod tests { + use std::net::{Ipv4Addr, SocketAddrV4}; + use super::*; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -1962,11 +1969,16 @@ mod tests { let ledger = Arc::new(MockLedgerService::new_at_height(committee, height)); let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 10); + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the primary. let account = accounts[account_index].1.clone(); let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut primary = - Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).await.unwrap(); + Primary::new(account, storage, ledger, block_sync, Some(any_addr), &[], StorageMode::Test(None), None) + .await + .unwrap(); // Construct a worker instance. primary.workers = Arc::from([Worker::new( @@ -2192,6 +2204,7 @@ mod tests { } } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch() { let mut rng = TestRng::default(); @@ -2213,6 +2226,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_no_transmissions() { let mut rng = TestRng::default(); @@ -2226,6 +2240,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_in_round() { let round = 3; @@ -2251,6 +2266,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_skip_transmissions_from_previous_certificates() { let round = 3; @@ -2323,6 +2339,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_over_spend_limit() { let mut rng = TestRng::default(); @@ -2360,6 +2377,7 @@ mod tests { assert_eq!(primary.workers().iter().map(|worker| worker.transmissions().len()).sum::(), 3); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer() { let mut rng = TestRng::default(); @@ -2399,6 +2417,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_when_not_synced() { let mut rng = TestRng::default(); @@ -2436,6 +2455,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round() { let round = 2; @@ -2476,6 +2496,7 @@ mod tests { primary.process_batch_propose_from_peer(peer_ip, (*proposal.batch_header()).clone().into()).await.unwrap(); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_wrong_round() { let mut rng = TestRng::default(); @@ -2518,6 +2539,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round_wrong_round() { let round = 4; @@ -2564,6 +2586,7 @@ mod tests { } /// Tests that the minimum batch delay is enforced as expected, i.e., that proposals with timestamps that are too close to the previous proposal are rejected. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_with_past_timestamp() { let round = 2; @@ -2614,6 +2637,7 @@ mod tests { } /// Check that proposals rejected that have timestamps older than the previous proposal. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_over_spend_limit() { let mut rng = TestRng::default(); @@ -2679,6 +2703,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal_lock() { let round = 3; @@ -2712,6 +2737,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal() { let round = 5; @@ -2742,6 +2768,7 @@ mod tests { assert!(primary.proposed_batch.read().as_ref().unwrap().round() > primary.current_round()); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer() { let mut rng = TestRng::default(); @@ -2778,6 +2805,7 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer_in_round() { let round = 5; @@ -2817,6 +2845,7 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_no_quorum() { let mut rng = TestRng::default(); @@ -2852,6 +2881,7 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_in_round_no_quorum() { let round = 7; @@ -2890,6 +2920,7 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_insert_certificate_with_aborted_transmissions() { let round = 3; diff --git a/node/bft/tests/bft_e2e.rs b/node/bft/tests/bft_e2e.rs index ceb515065f..042110c60f 100644 --- a/node/bft/tests/bft_e2e.rs +++ b/node/bft/tests/bft_e2e.rs @@ -95,6 +95,7 @@ async fn test_resync() { deadline!(Duration::from_secs(20), move || { network_clone.is_round_reached(RECOVERY_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_threshold() { // Start N nodes but don't connect them. @@ -149,6 +150,7 @@ async fn test_quorum_threshold() { deadline!(Duration::from_secs(20), move || { network.is_round_reached(TARGET_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_break() { // Start N nodes, connect them and start the cannons for each. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index 082f7db92e..d69785b72c 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -19,7 +19,7 @@ use crate::common::{ utils::{fire_unconfirmed_solutions, fire_unconfirmed_transactions, initialize_logger}, }; use snarkos_account::Account; -use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Primary, helpers::Storage}; +use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; use snarkvm::{ @@ -164,7 +164,7 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, @@ -178,7 +178,7 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, diff --git a/node/tests/common/node.rs b/node/tests/common/node.rs index e161a703e7..dae081b62d 100644 --- a/node/tests/common/node.rs +++ b/node/tests/common/node.rs @@ -19,12 +19,18 @@ use snarkos_node::{Client, Prover, Validator}; use snarkvm::prelude::{MainnetV0 as CurrentNetwork, store::helpers::memory::ConsensusMemory}; use aleo_std::StorageMode; -use std::str::FromStr; +use std::{ + net::{IpAddr, Ipv4Addr, SocketAddr}, + str::FromStr, +}; + +/// Bind to a random port to avoid conflicts during testing. +const ANY_ADDR: SocketAddr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0); pub async fn client() -> Client> { Client::new( - "127.0.0.1:0".parse().unwrap(), - None, + ANY_ADDR, + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], @@ -41,7 +47,7 @@ pub async fn client() -> Client> pub async fn prover() -> Prover> { Prover::new( - "127.0.0.1:0".parse().unwrap(), + ANY_ADDR, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], sample_genesis_block(), @@ -55,9 +61,9 @@ pub async fn prover() -> Prover> pub async fn validator() -> Validator> { Validator::new( - "127.0.0.1:0".parse().unwrap(), - None, - None, + ANY_ADDR, + Some(ANY_ADDR), + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], From 1c0826191eae1c9ed5af8258229978b8be8ec892 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Tue, 9 Sep 2025 15:23:51 -0600 Subject: [PATCH 06/16] fix(node/router): don't panic if peer upgrade fails --- node/router/src/handshake.rs | 4 +++- node/router/src/helpers/peer.rs | 12 ++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/node/router/src/handshake.rs b/node/router/src/handshake.rs index 4f7c94d736..9b7804e445 100644 --- a/node/router/src/handshake.rs +++ b/node/router/src/handshake.rs @@ -131,7 +131,9 @@ impl Router { if let Some(addr) = listener_addr { if let Ok(ref challenge_request) = handshake_result { if let Some(peer) = self.peer_pool.write().get_mut(&addr) { - peer.upgrade_to_connected(peer_addr, challenge_request, self.clone()); + if let Err(err) = peer.upgrade_to_connected(peer_addr, challenge_request, self.clone()) { + warn!("Failed to upgrade peer to `connected`: {err}"); + } } #[cfg(feature = "metrics")] self.update_metrics(); diff --git a/node/router/src/helpers/peer.rs b/node/router/src/helpers/peer.rs index a05c1d3173..e90f8a0afe 100644 --- a/node/router/src/helpers/peer.rs +++ b/node/router/src/helpers/peer.rs @@ -16,6 +16,7 @@ use crate::{NodeType, Router, messages::ChallengeRequest}; use snarkvm::prelude::{Address, Network}; +use anyhow::{Result, ensure}; use std::{net::SocketAddr, time::Instant}; /// A peer of any connection status. @@ -84,9 +85,14 @@ impl Peer { } /// Promote a connecting peer to a fully connected one. - pub fn upgrade_to_connected(&mut self, connected_addr: SocketAddr, cr: &ChallengeRequest, router: Router) { + pub fn upgrade_to_connected( + &mut self, + connected_addr: SocketAddr, + cr: &ChallengeRequest, + router: Router, + ) -> Result<()> { // Logic check: this can only happen during the handshake. - assert!(matches!(self, Self::Connecting(_))); + ensure!(matches!(self, Self::Connecting(_)), "Peer is not in `connecting` state"); let timestamp = Instant::now(); let listener_addr = SocketAddr::from((connected_addr.ip(), cr.listener_port)); @@ -106,6 +112,8 @@ impl Peer { last_seen: timestamp, router, }); + + Ok(()) } /// Demote a peer to candidate status, marking it as disconnected. From e83cc17dc8571838d5c9548f4497f96092076de1 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Wed, 10 Sep 2025 09:51:39 -0700 Subject: [PATCH 07/16] log(node/bft): add context for errors while fetching transmission from peers --- node/bft/src/worker.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index df8b9d6aad..3098c808d2 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -32,6 +32,7 @@ use snarkvm::{ }, }; +use anyhow::Context; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -498,12 +499,12 @@ impl Worker { ); } // Wait for the transmission to be fetched. - match timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the transmission was fetched, return it. - Ok(result) => Ok((transmission_id, result?)), - // If the transmission was not fetched, return an error. - Err(e) => bail!("Unable to fetch transmission - (timeout) {e}"), - } + let transmission = timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| "Unable to fetch transmission from peer - (timeout)")? + .with_context(|| "Unable to fetch transmission from peer")?; + + Ok((transmission_id, transmission)) } /// Handles the incoming transmission response. From 6ba5f1d1aeddf29ae2ca64be2e9fef0a9f1e29df Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Tue, 9 Sep 2025 21:06:37 -0700 Subject: [PATCH 08/16] fix(node/bft): propagate panics for blocking tasks --- Cargo.lock | 128 +++++++++++++++++++------------------- Cargo.toml | 3 +- node/Cargo.toml | 1 + node/bft/Cargo.toml | 2 +- node/bft/src/gateway.rs | 39 +++++------- node/bft/src/lib.rs | 11 ---- node/bft/src/primary.rs | 54 ++++++++-------- node/bft/src/sync/mod.rs | 13 ++-- node/bft/src/worker.rs | 37 +++++------ node/consensus/Cargo.toml | 1 + node/consensus/src/lib.rs | 7 +-- node/src/client/mod.rs | 34 +++++----- node/src/lib.rs | 71 --------------------- node/src/prover/mod.rs | 10 ++- node/src/validator/mod.rs | 44 +++++++------ 15 files changed, 182 insertions(+), 273 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87cd5b67e8..d247170215 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1786,7 +1786,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.0", + "socket2 0.5.10", "system-configuration", "tokio", "tower-service", @@ -2913,7 +2913,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.6.0", + "socket2 0.5.10", "thiserror 2.0.16", "tokio", "tracing", @@ -2950,7 +2950,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.0", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] @@ -4149,7 +4149,7 @@ dependencies = [ [[package]] name = "snarkvm" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anstyle", "anyhow", @@ -4174,7 +4174,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4202,7 +4202,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms-cuda" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "blst", "cc", @@ -4213,7 +4213,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4227,7 +4227,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-network", "snarkvm-circuit-types", @@ -4237,7 +4237,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-types", "snarkvm-console-algorithms", @@ -4247,7 +4247,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-types", @@ -4257,7 +4257,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "indexmap 2.11.3", "itertools 0.14.0", @@ -4275,12 +4275,12 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment-witness" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" [[package]] name = "snarkvm-circuit-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-collections", @@ -4291,7 +4291,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4305,7 +4305,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-address", @@ -4320,7 +4320,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4333,7 +4333,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-console-types-boolean", @@ -4342,7 +4342,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4352,7 +4352,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4364,7 +4364,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4376,7 +4376,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4387,7 +4387,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4399,7 +4399,7 @@ dependencies = [ [[package]] name = "snarkvm-console" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-account", "snarkvm-console-algorithms", @@ -4412,7 +4412,7 @@ dependencies = [ [[package]] name = "snarkvm-console-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "bs58", "snarkvm-console-network", @@ -4423,7 +4423,7 @@ dependencies = [ [[package]] name = "snarkvm-console-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "blake2s_simd", "smallvec", @@ -4436,7 +4436,7 @@ dependencies = [ [[package]] name = "snarkvm-console-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "rayon", @@ -4447,7 +4447,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "enum-iterator", @@ -4467,7 +4467,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "bech32", @@ -4485,7 +4485,7 @@ dependencies = [ [[package]] name = "snarkvm-console-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "enum-iterator", "enum_index", @@ -4505,7 +4505,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-address", @@ -4520,7 +4520,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4531,7 +4531,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", ] @@ -4539,7 +4539,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4549,7 +4549,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4560,7 +4560,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4571,7 +4571,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4582,7 +4582,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4593,7 +4593,7 @@ dependencies = [ [[package]] name = "snarkvm-curves" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "rand 0.8.5", "rayon", @@ -4607,7 +4607,7 @@ dependencies = [ [[package]] name = "snarkvm-fields" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4624,7 +4624,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4651,7 +4651,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-authority" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "rand 0.8.5", @@ -4663,7 +4663,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-block" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "indexmap 2.11.3", @@ -4685,7 +4685,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-committee" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "indexmap 2.11.3", @@ -4704,7 +4704,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-ledger-narwhal-batch-certificate", "snarkvm-ledger-narwhal-batch-header", @@ -4717,7 +4717,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-certificate" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4730,7 +4730,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-header" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4743,7 +4743,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-data" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "bytes", "serde_json", @@ -4754,7 +4754,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-subdag" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4769,7 +4769,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "bytes", "serde_json", @@ -4782,7 +4782,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission-id" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "snarkvm-console", "snarkvm-ledger-puzzle", @@ -4791,7 +4791,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4811,7 +4811,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle-epoch" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4834,7 +4834,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-query" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "anyhow", "async-trait", @@ -4851,7 +4851,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-store" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std-storage", "anyhow", @@ -4878,7 +4878,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-test-helpers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4896,7 +4896,7 @@ dependencies = [ [[package]] name = "snarkvm-metrics" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "metrics", ] @@ -4904,7 +4904,7 @@ dependencies = [ [[package]] name = "snarkvm-parameters" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4927,7 +4927,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", @@ -4960,7 +4960,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-process" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "colored 3.0.0", @@ -4985,7 +4985,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "indexmap 2.11.3", "paste", @@ -5003,7 +5003,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-snark" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "bincode", "serde_json", @@ -5016,11 +5016,12 @@ dependencies = [ [[package]] name = "snarkvm-utilities" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "aleo-std", "anyhow", "bincode", + "colored 3.0.0", "num-bigint", "num_cpus", "rand 0.8.5", @@ -5031,6 +5032,7 @@ dependencies = [ "smol_str", "snarkvm-utilities-derives", "thiserror 2.0.16", + "tokio", "tracing", "zeroize", ] @@ -5038,7 +5040,7 @@ dependencies = [ [[package]] name = "snarkvm-utilities-derives" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" dependencies = [ "proc-macro2", "quote 1.0.40", diff --git a/Cargo.toml b/Cargo.toml index 2c332825c1..3e99b33c28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,8 @@ default-features = false [workspace.dependencies.snarkvm] #path = "../snarkVM" git = "https://github.com/ProvableHQ/snarkVM.git" -rev = "35c82646eeb2e9561be" +#rev = "35c82646eeb2e9561be" +branch = "feat/track-error" #version = "=4.2.1" default-features = false #features = [ "circuit", "console", "rocks" ] diff --git a/node/Cargo.toml b/node/Cargo.toml index 330b2a8a8b..4929c51d4f 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -120,6 +120,7 @@ workspace = true [dependencies.snarkvm] workspace = true +features = [ "utilities", "async" ] [dependencies.time] workspace = true diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 1d5015b33c..12babc736c 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -123,7 +123,7 @@ workspace = true [dependencies.snarkvm] workspace = true -features = [ "utilities" ] +features = [ "utilities", "async" ] [dependencies.time] workspace = true diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index a161cd4d9d..d8d0abfa24 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -22,7 +22,6 @@ use crate::{ Worker, events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, helpers::{Cache, CallbackHandle, Resolver, Storage, WorkerSender, assign_to_worker}, - spawn_blocking, }; use snarkos_account::Account; use snarkos_node_bft_events::{ @@ -61,6 +60,7 @@ use snarkvm::{ narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, + utilities::task::{self, JoinHandle}, }; use colored::Colorize; @@ -84,7 +84,6 @@ use std::{ use tokio::{ net::TcpStream, sync::{OnceCell, oneshot}, - task::{self, JoinHandle}, }; use tokio_stream::StreamExt; use tokio_util::codec::Framed; @@ -325,7 +324,7 @@ impl CommunicationService for Gateway { let tcp = self.tcp().clone(); tcp.banned_peers().update_ip_ban(peer_ip.ip()); - tokio::spawn(async move { + task::spawn(async move { tcp.disconnect(peer_ip).await; }); } @@ -479,7 +478,7 @@ impl Gateway { } let self_ = self.clone(); - Some(tokio::spawn(async move { + Some(task::spawn(async move { debug!("Connecting to validator {peer_ip}..."); // Attempt to connect to the peer. if let Err(error) = self_.tcp.connect(peer_ip).await { @@ -707,22 +706,17 @@ impl Gateway { } let self_ = self.clone(); - let blocks = match task::spawn_blocking(move || { + let blocks = task::spawn_blocking(move || { // Retrieve the blocks within the requested range. match self_.ledger.get_blocks(start_height..end_height) { Ok(blocks) => Ok(Data::Object(DataBlocks(blocks))), Err(error) => bail!("Missing blocks {start_height} to {end_height} from ledger - {error}"), } }) - .await - { - Ok(Ok(blocks)) => blocks, - Ok(Err(error)) => return Err(error), - Err(error) => return Err(anyhow!("[BlockRequest] {error}")), - }; + .await?; let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Send the `BlockResponse` message to the peer. let event = Event::BlockResponse(BlockResponse { request: block_request, blocks }); Transport::send(&self_, peer_ip, event).await; @@ -853,7 +847,7 @@ impl Gateway { connected_peers.shuffle(&mut rand::thread_rng()); let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Initialize the validators. let mut validators = IndexMap::with_capacity(MAX_VALIDATORS_TO_SEND); // Iterate over the validators. @@ -885,7 +879,7 @@ impl Gateway { if self.number_of_connected_peers() < MIN_CONNECTED_VALIDATORS { // Attempt to connect to any validators that are not already connected. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { for (validator_ip, validator_address) in validators { if self_.dev.is_some() { // Ensure the validator IP is not this node. @@ -951,7 +945,7 @@ impl Gateway { /// Disconnects from the given peer IP, if the peer is connected. pub fn disconnect(&self, peer_ip: SocketAddr) -> JoinHandle<()> { let gateway = self.clone(); - tokio::spawn(async move { + task::spawn(async move { if let Some(peer_addr) = gateway.resolver.get_ambiguous(peer_ip) { // Disconnect from this peer. let _disconnected = gateway.tcp.disconnect(peer_addr).await; @@ -977,9 +971,8 @@ impl Gateway { } /// Spawns a task with the given future; it should only be used for long-running tasks. - #[allow(dead_code)] fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the gateway. @@ -1086,7 +1079,7 @@ impl Gateway { /// This function attempts to disconnect any validators that are not in the current committee. fn handle_unauthorized_validators(&self) { let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Retrieve the connected validators. let validators = self_.connected_peers().read().clone(); // Iterate over the validator IPs. @@ -1116,7 +1109,7 @@ impl Gateway { // Select a random validator IP. if let Some(validator_ip) = validators.into_iter().choose(&mut rand::thread_rng()) { let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Increment the number of outbound validators requests for this validator. self_.cache.increment_outbound_validators_requests(validator_ip); // Send a `ValidatorsRequest` to the validator. @@ -1133,7 +1126,7 @@ impl Gateway { if let Some(peer_ip) = self.resolver.get_listener(peer_addr) { warn!("{CONTEXT} Disconnecting from '{peer_ip}' - {error}"); let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { Transport::send(&self_, peer_ip, DisconnectReason::ProtocolViolation.into()).await; // Disconnect from this peer. self_.disconnect(peer_ip); @@ -1205,7 +1198,7 @@ impl Transport for Gateway { if self.number_of_connected_peers() > 0 { let self_ = self.clone(); let connected_peers = self.connected_peers.read().clone(); - tokio::spawn(async move { + task::spawn(async move { // Iterate through all connected peers. for peer_ip in connected_peers { // Send the event to the peer. @@ -1240,7 +1233,7 @@ impl Reading for Gateway { let self_ = self.clone(); // Handle BlockRequest and BlockResponse messages in a separate task to not block the // inbound queue. - tokio::spawn(async move { + task::spawn(async move { self_.process_message_inner(peer_addr, message).await; }); } else { @@ -1587,7 +1580,7 @@ impl Gateway { return Some(DisconnectReason::InvalidChallengeResponse); } // Perform the deferred non-blocking deserialization of the signature. - let Ok(signature) = spawn_blocking!(signature.deserialize_blocking()) else { + let Ok(signature) = task::spawn_blocking(|| signature.deserialize_blocking()).await else { warn!("{CONTEXT} Gateway handshake with '{peer_addr}' failed (cannot deserialize the signature)"); return Some(DisconnectReason::InvalidChallengeResponse); }; diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index 33dbb08775..19aaab9c6b 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -69,14 +69,3 @@ pub const MAX_WORKERS: u8 = 1; // worker(s) pub const PRIMARY_PING_IN_MS: u64 = 2 * MAX_BATCH_DELAY_IN_MS; // ms /// The interval at which each worker broadcasts a ping to every other node. pub const WORKER_PING_IN_MS: u64 = 4 * MAX_BATCH_DELAY_IN_MS; // ms - -/// A helper macro to spawn a blocking task. -#[macro_export] -macro_rules! spawn_blocking { - ($expr:expr) => { - match tokio::task::spawn_blocking(move || $expr).await { - Ok(value) => value, - Err(error) => Err(anyhow::anyhow!("[tokio::spawn_blocking] {error}")), - } - }; -} diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 4dcd99bff7..e37074f7fb 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -34,7 +34,6 @@ use crate::{ init_worker_channels, now, }, - spawn_blocking, sync::{Sync, SyncCallback}, }; use snarkos_account::Account; @@ -53,6 +52,7 @@ use snarkvm::{ narwhal::{BatchCertificate, BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, + utilities::spawn_blocking, }; use aleo_std::StorageMode; @@ -571,14 +571,13 @@ impl Primary { } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -677,7 +676,7 @@ impl Primary { // Prepare the previous batch certificate IDs. let previous_certificate_ids = previous_certificates.into_iter().map(|c| c.id()).collect(); // Sign the batch header and construct the proposal. - let (batch_header, proposal) = spawn_blocking!({ + let (batch_header, proposal) = spawn_blocking(move || { BatchHeader::new( &private_key, round, @@ -688,6 +687,7 @@ impl Primary { &mut rand::thread_rng(), ) }) + .await .and_then(|batch_header| { Proposal::new(committee_lookback, batch_header.clone(), transmissions.clone()) .map(|proposal| (batch_header, proposal)) @@ -720,7 +720,7 @@ impl Primary { let BatchPropose { round: batch_round, batch_header } = batch_propose; // Deserialize the batch header. - let batch_header = spawn_blocking!(batch_header.deserialize_blocking())?; + let batch_header = spawn_blocking(|| batch_header.deserialize_blocking()).await?; // Ensure the round matches in the batch header. if batch_round != batch_header.round() { // Proceed to disconnect the validator. @@ -851,7 +851,8 @@ impl Primary { // Ensure the batch header from the peer is valid. let (storage, header) = (self.storage.clone(), batch_header.clone()); let missing_transmissions = - spawn_blocking!({ storage.check_batch_header(&header, missing_transmissions, Default::default()) })?; + spawn_blocking(move || storage.check_batch_header(&header, missing_transmissions, Default::default())) + .await?; // Inserts the missing transmissions into the workers. self.insert_missing_transmissions_into_workers(peer_ip, missing_transmissions.into_iter())?; @@ -876,14 +877,13 @@ impl Primary { (transmission_id, transmission) { // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -944,7 +944,7 @@ impl Primary { let batch_id = batch_header.batch_id(); // Sign the batch ID. let account = self.gateway.account().clone(); - let signature = spawn_blocking!(account.sign(&[batch_id], &mut rand::thread_rng()))?; + let signature = spawn_blocking(move || account.sign(&[batch_id], &mut rand::thread_rng())).await?; // Ensure the proposal has not already been signed. // @@ -1017,7 +1017,7 @@ impl Primary { } let self_ = self.clone(); - let Some(proposal) = spawn_blocking!({ + let Some(proposal) = spawn_blocking(move || { // Acquire the write lock. let mut proposed_batch = self_.proposed_batch.write(); // Add the signature to the batch, and determine if the batch is ready to be certified. @@ -1065,7 +1065,7 @@ impl Primary { Some(proposal) => Ok(Some(proposal)), None => Ok(None), } - })? + }).await? else { return Ok(()); }; @@ -1202,7 +1202,7 @@ impl Primary { // Retrieve the block locators. let self__ = self_.clone(); - let block_locators = match spawn_blocking!(self__.sync.get_block_locators()) { + let block_locators = match spawn_blocking(move || self__.sync.get_block_locators()).await { Ok(block_locators) => block_locators, Err(e) => { warn!("Failed to retrieve block locators - {e}"); @@ -1463,7 +1463,7 @@ impl Primary { let transmissions = transmissions.into_iter().collect::>(); // Store the certified batch. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, transmissions, Default::default()))?; + spawn_blocking(move || storage.insert_certificate(certificate_, transmissions, Default::default())).await?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. if let Some(cb) = self.primary_callback.get() { @@ -1549,7 +1549,8 @@ impl Primary { if !self.storage.contains_certificate(certificate.id()) { // Store the batch certificate. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!({ storage.insert_certificate(certificate_, missing_transmissions, Default::default()) })?; + spawn_blocking(move || storage.insert_certificate(certificate_, missing_transmissions, Default::default())) + .await?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. if let Some(cb) = self.primary_callback.get() { @@ -1805,7 +1806,8 @@ impl GatewayPrimaryCallback for Primary { let self_ = self.clone(); tokio::spawn(async move { // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) else { + let Ok(primary_certificate) = spawn_blocking(|| primary_certificate.deserialize_blocking()).await + else { warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); return; }; @@ -1865,7 +1867,7 @@ impl GatewayPrimaryCallback for Primary { let self_ = self.clone(); tokio::spawn(async move { // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { + let Ok(batch_certificate) = spawn_blocking(|| batch_certificate.deserialize_blocking()).await else { warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); return; }; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 9ceb8a4516..7c4ca78fcb 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -19,8 +19,8 @@ use crate::{ events::DataBlocks, gateway::{Gateway, GatewaySyncCallback, Transport}, helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, - spawn_blocking, }; + use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncRequest, locators::BlockLocators}; @@ -28,6 +28,7 @@ use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, prelude::{cfg_into_iter, cfg_iter}, + utilities::spawn_blocking, }; use anyhow::{Context, Result, anyhow, bail}; @@ -219,10 +220,10 @@ impl Sync { // Remove the expired pending transmission requests. let self__ = self_.clone(); - let _ = spawn_blocking!({ + spawn_blocking(move || { self__.pending.clear_expired_callbacks(); - Ok(()) - }); + }) + .await; } }); @@ -565,7 +566,7 @@ impl Sync { let _lock = self.sync_lock.lock().await; let self_ = self.clone(); - tokio::task::spawn_blocking(move || { + spawn_blocking(move || { // Check the next block. self_.ledger.check_next_block(&block)?; // Attempt to advance to the next block. @@ -580,7 +581,7 @@ impl Sync { Ok(()) }) - .await? + .await } /// Advances the ledger by the given block and updates the storage accordingly. diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 3098c808d2..f1b95402da 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -20,7 +20,6 @@ use crate::{ events::{Event, TransmissionRequest, TransmissionResponse}, gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, - spawn_blocking, }; use snarkos_node_bft_ledger_service::LedgerService; use snarkvm::{ @@ -30,6 +29,7 @@ use snarkvm::{ narwhal::{BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, + utilities::task::{self, JoinHandle}, }; use anyhow::Context; @@ -41,7 +41,7 @@ use locktick::parking_lot::{Mutex, RwLock}; use parking_lot::{Mutex, RwLock}; use rand::seq::IteratorRandom; use std::{future::Future, net::SocketAddr, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle, time::timeout}; +use tokio::{sync::oneshot, time::timeout}; /// A worker's main role is maintaining a queue of verified ("ready") transmissions, /// which will eventually be fetched by the primary when the primary generates a new batch. @@ -268,7 +268,7 @@ impl Worker { } // Attempt to fetch the transmission from the peer. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Send a transmission request to the peer. match self_.send_transmission_request(peer_ip, transmission_id).await { // If the transmission was fetched, then process it. @@ -323,7 +323,7 @@ impl Worker { if tx.is_execute() { let self_ = self.clone(); let tx_ = tx.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = self_.ledger.check_transaction_basic(tx_id, tx_).await; }); } @@ -391,12 +391,11 @@ impl Worker { bail!("Transaction '{}.{}' already exists.", fmt_id(transaction_id), fmt_id(checksum).dimmed()); } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?), - } - })?; + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)), + }) + .await?; // Check that the transaction is well-formed and unique. self.ledger.check_transaction_basic(transaction_id, transaction).await?; @@ -427,10 +426,7 @@ impl Worker { // Remove the expired pending certificate requests. let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.pending.clear_expired_callbacks(); - Ok(()) - }); + task::spawn_blocking(move || self__.pending.clear_expired_callbacks()).await; } }); @@ -456,10 +452,7 @@ impl Worker { while let Some((peer_ip, transmission_response)) = rx_transmission_response.recv().await { // Process the transmission response. let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.finish_transmission_request(peer_ip, transmission_response); - Ok(()) - }); + task::spawn_blocking(move || self__.finish_transmission_request(peer_ip, transmission_response)).await; } }); } @@ -533,7 +526,7 @@ impl Worker { if let Some(transmission) = self.get_transmission(transmission_id) { // Send the transmission response to the peer. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { self_.gateway.send(peer_ip, Event::TransmissionResponse((transmission_id, transmission).into())).await; }); } @@ -541,14 +534,14 @@ impl Worker { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the worker. pub(crate) fn shut_down(&self) { trace!("Shutting down worker {}...", self.id); - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + // Abort and discard the tasks. + self.handles.lock().drain(..).for_each(|handle| handle.abort()); } } diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index 3b9e5eb071..96e69f8673 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -71,6 +71,7 @@ workspace = true [dependencies.snarkvm] workspace = true +features = [ "utilities", "async" ] [dependencies.tokio] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index a2a6d40f70..4a03d5e902 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -32,7 +32,6 @@ use snarkos_node_bft::{ Primary, helpers::{Storage as NarwhalStorage, fmt_id}, ledger_service::LedgerService, - spawn_blocking, storage_service::BFTPersistentStorage, }; use snarkos_node_sync::{BlockSync, Ping}; @@ -44,6 +43,7 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::*, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -56,7 +56,6 @@ use lru::LruCache; #[cfg(not(feature = "locktick"))] use parking_lot::{Mutex, RwLock}; use std::{future::Future, net::SocketAddr, num::NonZeroUsize, sync::Arc, time::Duration}; -use tokio::task::JoinHandle; #[cfg(feature = "metrics")] use std::collections::HashMap; @@ -477,7 +476,7 @@ impl BftCallback for Consensus { // Try to advance to the next block. let self_ = self.clone(); let transmissions_ = transmissions.clone(); - let result = spawn_blocking! { self_.try_advance_to_next_block(subdag, transmissions_) }; + let result = task::spawn_blocking(move || self_.try_advance_to_next_block(subdag, transmissions_)).await; // If the block failed to advance, reinsert the transmissions into the memory pool. if result.is_err() { @@ -616,7 +615,7 @@ impl Consensus { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the consensus and BFT layers. diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index ffc0041599..98601b0481 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -43,6 +43,7 @@ use snarkvm::{ store::ConsensusStorage, }, prelude::{VM, block::Transaction}, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -67,10 +68,7 @@ use std::{ }, time::{Duration, Instant}, }; -use tokio::{ - task::JoinHandle, - time::{sleep, timeout}, -}; +use tokio::time::{sleep, timeout}; /// The maximum number of solutions to verify in parallel. /// Note: worst case memory to verify a solution is 0.5 GiB. @@ -227,8 +225,6 @@ impl> Client { node.initialize_deploy_verification(); // Initialize execution verification. node.initialize_execute_verification(); - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); // Pass the node to the signal handler. let _ = signal_node.set(node.clone()); // Return the node. @@ -261,7 +257,7 @@ impl> Client { let _self = self.clone(); let mut last_update = Instant::now(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. if _self.shutdown.load(std::sync::atomic::Ordering::Acquire) { @@ -282,7 +278,7 @@ impl> Client { _self.try_block_sync().await; last_update = now; } - })); + }); } /// Client-side version of `snarkos_node_bft::Sync::try_block_sync()`. @@ -360,7 +356,7 @@ impl> Client { } // Sleep to avoid triggering spam detection. - tokio::time::sleep(BLOCK_REQUEST_BATCH_DELAY).await; + sleep(BLOCK_REQUEST_BATCH_DELAY).await; } } @@ -368,7 +364,7 @@ impl> Client { fn initialize_solution_verification(&self) { // Start the solution verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. if node.shutdown.load(Acquire) { @@ -394,7 +390,7 @@ impl> Client { let previous_counter = node.num_verifying_solutions.fetch_add(1, Relaxed); let _node = node.clone(); // For each solution, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Retrieve the latest epoch hash. if let Ok(epoch_hash) = _node.ledger.latest_epoch_hash() { // Check if the prover has reached their solution limit. @@ -435,14 +431,14 @@ impl> Client { } } } - })); + }); } /// Initializes deploy verification. fn initialize_deploy_verification(&self) { // Start the deploy verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. if node.shutdown.load(Acquire) { @@ -468,7 +464,7 @@ impl> Client { let previous_counter = node.num_verifying_deploys.fetch_add(1, Relaxed); let _node = node.clone(); // For each deployment, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the deployment. match _node.ledger.check_transaction_basic(&transaction, None, &mut rand::thread_rng()) { Ok(_) => { @@ -488,14 +484,14 @@ impl> Client { } } } - })); + }); } /// Initializes execute verification. fn initialize_execute_verification(&self) { // Start the execute verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. if node.shutdown.load(Acquire) { @@ -521,7 +517,7 @@ impl> Client { let previous_counter = node.num_verifying_executions.fetch_add(1, Relaxed); let _node = node.clone(); // For each execution, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the execution. match _node.ledger.check_transaction_basic(&transaction, None, &mut rand::thread_rng()) { Ok(_) => { @@ -541,12 +537,12 @@ impl> Client { } } } - })); + }); } /// Spawns a task with the given future; it should only be used for long-running tasks. pub fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } } diff --git a/node/src/lib.rs b/node/src/lib.rs index 3ac2106561..a162a12218 100644 --- a/node/src/lib.rs +++ b/node/src/lib.rs @@ -63,74 +63,3 @@ pub fn log_clean_error(storage_mode: &StorageMode) { } } } - -/// Starts the notification message loop. -pub fn start_notification_message_loop() -> tokio::task::JoinHandle<()> { - // let mut interval = tokio::time::interval(std::time::Duration::from_secs(180)); - tokio::spawn(async move { - // loop { - // interval.tick().await; - // // TODO (howardwu): Swap this with the official message for announcements. - // // info!("{}", notification_message()); - // } - }) -} - -/// Returns the notification message as a string. -pub fn notification_message() -> String { - use colored::Colorize; - - let mut output = String::new(); - output += &r#" - - ================================================================================================== - - 🚧 Welcome to Aleo - Calibration Period 🚧 - - ================================================================================================== - - During the calibration period, the network will be running in limited capacity. - - This calibration period is to ensure validators are stable and ready for mainnet launch. - During this period, the objective is to assess, adjust, and align validators' performance, - stability, and interoperability under varying network conditions. - - Please expect several network resets. With each network reset, software updates will - be performed to address potential bottlenecks, vulnerabilities, and/or inefficiencies, which - will ensure optimal performance for the ecosystem of validators, provers, and developers. - - ================================================================================================== - - Duration: - - Start Date: September 27, 2023 - - End Date: October 18, 2023 (subject to change) - - Participation: - - Node operators are NOT REQUIRED to participate during this calibration period. - - Network Resets: - - IMPORTANT: EXPECT MULTIPLE NETWORK RESETS. - - If participating, BE PREPARED TO RESET YOUR NODE AT ANY TIME. - - When a reset occurs, RUN THE FOLLOWING TO RESET YOUR NODE: - - git checkout mainnet && git pull - - cargo install --locked --path . - - snarkos clean - - snarkos start --nodisplay --client - - Communication: - - Stay ONLINE and MONITOR our Discord and Twitter for community updates. - - Purpose: - - This period is STRICTLY FOR NETWORK CALIBRATION. - - This period is NOT INTENDED for general-purpose usage by developers and provers. - - Incentives: - - There are NO INCENTIVES during this calibration period. - - ================================================================================================== -"# - .white() - .bold(); - - output -} diff --git a/node/src/prover/mod.rs b/node/src/prover/mod.rs index 44fe46e827..b86abe01c8 100644 --- a/node/src/prover/mod.rs +++ b/node/src/prover/mod.rs @@ -40,6 +40,7 @@ use snarkvm::{ store::ConsensusStorage, }, synthesizer::VM, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -59,7 +60,6 @@ use std::{ atomic::{AtomicBool, AtomicU8, Ordering}, }, }; -use tokio::task::JoinHandle; /// A prover is a light node, capable of producing proofs for consensus. #[derive(Clone)] @@ -153,8 +153,6 @@ impl> Prover { node.initialize_routing().await; // Initialize the puzzle. node.initialize_puzzle().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); // Pass the node to the signal handler. let _ = signal_node.set(node.clone()); // Return the node. @@ -192,7 +190,7 @@ impl> Prover { async fn initialize_puzzle(&self) { for _ in 0..self.max_puzzle_instances { let prover = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.handles.lock().push(task::spawn(async move { prover.puzzle_loop().await; })); } @@ -228,13 +226,13 @@ impl> Prover { if let (Some(epoch_hash), Some((coinbase_target, proof_target))) = (latest_epoch_hash, latest_state) { // Execute the puzzle. let prover = self.clone(); - let result = tokio::task::spawn_blocking(move || { + let result = task::spawn_blocking(move || { prover.puzzle_iteration(epoch_hash, coinbase_target, proof_target, &mut OsRng) }) .await; // If the prover found a solution, then broadcast it. - if let Ok(Some((solution_target, solution))) = result { + if let Some((solution_target, solution)) = result { info!("Found a Solution '{}' (Proof Target {solution_target})", solution.id()); // Broadcast the solution. self.broadcast_solution(solution); diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 9bc6b98dcb..4f53c71ba7 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -18,7 +18,7 @@ mod router; use crate::traits::NodeInterface; use snarkos_account::Account; -use snarkos_node_bft::{ledger_service::CoreLedgerService, spawn_blocking}; +use snarkos_node_bft::ledger_service::CoreLedgerService; use snarkos_node_cdn::CdnBlockSync; use snarkos_node_consensus::Consensus; use snarkos_node_rest::Rest; @@ -35,12 +35,15 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; -use snarkvm::prelude::{ - Ledger, - Network, - block::{Block, Header}, - puzzle::Solution, - store::ConsensusStorage, +use snarkvm::{ + prelude::{ + Ledger, + Network, + block::{Block, Header}, + puzzle::Solution, + store::ConsensusStorage, + }, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -55,7 +58,6 @@ use std::{ sync::{Arc, atomic::AtomicBool}, time::Duration, }; -use tokio::task::JoinHandle; /// A validator is a full node, capable of validating blocks. #[derive(Clone)] @@ -186,8 +188,6 @@ impl> Validator { // Initialize the routing. node.initialize_routing().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); // Pass the node to the signal handler. let _ = signal_node.set(node.clone()); // Return the node. @@ -416,15 +416,19 @@ impl> Validator { let inputs = [Value::from(Literal::Address(self_.address())), Value::from(Literal::U64(U64::new(1)))]; // Execute the transaction. let self__ = self_.clone(); - let transaction = match spawn_blocking!(self__.ledger.vm().execute( - self__.private_key(), - locator, - inputs.into_iter(), - None, - 10_000, - None, - &mut rand::thread_rng(), - )) { + let transaction = match task::spawn_blocking(move || { + self__.ledger.vm().execute( + self__.private_key(), + locator, + inputs.into_iter(), + None, + 10_000, + None, + &mut rand::thread_rng(), + ) + }) + .await + { Ok(transaction) => transaction, Err(error) => { error!("Transaction pool encountered an execution error - {error}"); @@ -449,7 +453,7 @@ impl> Validator { /// Spawns a task with the given future; it should only be used for long-running tasks. pub fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } } From 8525aa2dd4075f0dab6dfdda02a7ef9f8be59ea3 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Thu, 11 Sep 2025 13:45:45 -0700 Subject: [PATCH 09/16] chore: update sppark, itertools, and mockall dependencies --- Cargo.lock | 62 +++++++++++++++++++-------------------------- node/bft/Cargo.toml | 4 +-- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d247170215..a9494b826f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1786,7 +1786,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.0", "system-configuration", "tokio", "tower-service", @@ -2039,15 +2039,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2084,9 +2075,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.79" +version = "0.3.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6247da8b8658ad4e73a186e747fcc5fc2a29f979d6fe6269127fdb5fd08298d0" +checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e" dependencies = [ "once_cell", "wasm-bindgen", @@ -2402,14 +2393,13 @@ dependencies = [ [[package]] name = "mockall" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43766c2b5203b10de348ffe19f7e54564b64f3d6018ff7648d1e2d6d3a0f0a48" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" dependencies = [ "cfg-if", "downcast", "fragile", - "lazy_static", "mockall_derive", "predicates", "predicates-tree", @@ -2417,9 +2407,9 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cbce79ec385a1d4f54baa90a76401eb15d9cab93685f62e7e9f942aa00ae2" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" dependencies = [ "cfg-if", "proc-macro2", @@ -2913,7 +2903,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.5.10", + "socket2 0.6.0", "thiserror 2.0.16", "tokio", "tracing", @@ -2950,7 +2940,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.0", "tracing", "windows-sys 0.60.2", ] @@ -3865,7 +3855,7 @@ dependencies = [ "deadline", "futures", "indexmap 2.11.3", - "itertools 0.12.1", + "itertools 0.14.0", "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "mockall", @@ -5510,9 +5500,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd" dependencies = [ "rustls", "tokio", @@ -5995,9 +5985,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad224d2776649cfb4f4471124f8176e54c1cca67a88108e30a0cd98b90e7ad3" +checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819" dependencies = [ "cfg-if", "once_cell", @@ -6008,9 +5998,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1364104bdcd3c03f22b16a3b1c9620891469f5e9f09bc38b2db121e593e732" +checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c" dependencies = [ "bumpalo", "log", @@ -6022,9 +6012,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.52" +version = "0.4.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c0a08ecf5d99d5604a6666a70b3cde6ab7cc6142f5e641a8ef48fc744ce8854" +checksum = "a0b221ff421256839509adbb55998214a70d829d3a28c69b4a6672e9d2a42f67" dependencies = [ "cfg-if", "js-sys", @@ -6035,9 +6025,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d7ab4ca3e367bb1ed84ddbd83cc6e41e115f8337ed047239578210214e36c76" +checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0" dependencies = [ "quote 1.0.40", "wasm-bindgen-macro-support", @@ -6045,9 +6035,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a518014843a19e2dbbd0ed5dfb6b99b23fb886b14e6192a00803a3e14c552b0" +checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -6058,18 +6048,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "255eb0aa4cc2eea3662a00c2bbd66e93911b7361d5e0fcd62385acfd7e15dcee" +checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.79" +version = "0.3.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50462a022f46851b81d5441d1a6f5bac0b21a1d72d64bd4906fbdd4bf7230ec7" +checksum = "fbe734895e869dc429d78c4b433f8d17d95f8d05317440b4fad5ab2d33e596dc" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 12babc736c..b9e971d3f5 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -157,7 +157,7 @@ features = [ "derive" ] version = "0.2" [dev-dependencies.itertools] -version = "0.12" +version = "0.14" [dev-dependencies.open] version = "5" @@ -203,7 +203,7 @@ features = [ "env-filter" ] workspace = true [dev-dependencies.mockall] -version = "0.12.1" +version = "0.13" [[test]] name = "gateway-e2e" From 6066291d4223ce27fbd573af2701f71264f6db20 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 13:17:41 -0700 Subject: [PATCH 10/16] misc(node): use new error logging mechanisms in snarkVM --- node/bft/src/bft.rs | 47 +++++++++++--------- node/bft/src/gateway.rs | 22 +++++---- node/bft/src/helpers/channels.rs | 7 ++- node/bft/src/lib.rsbeGfet.bck | 76 ++++++++++++++++++++++++++++++++ node/bft/src/primary.rs | 2 +- node/bft/src/sync/mod.rs | 40 +++++++++-------- node/bft/src/worker.rs | 3 +- node/bft/tests/gateway_e2e.rs | 2 +- node/sync/Cargo.toml | 2 +- node/sync/src/block_sync.rs | 30 ++++++------- 10 files changed, 163 insertions(+), 68 deletions(-) create mode 100644 node/bft/src/lib.rsbeGfet.bck diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index a059ec9f10..c0bafe48b9 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -31,9 +31,11 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::{Field, Network, Result, bail, ensure}, + utilities::LoggableError, }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -256,8 +258,8 @@ impl PrimaryCallback for BFT { // If the BFT is ready, then update to the next round. if is_ready { // Update to the next round in storage. - if let Err(e) = self.storage().increment_to_next_round(current_round) { - warn!("BFT failed to increment to the next round from round {current_round} - {e}"); + if let Err(err) = self.storage().increment_to_next_round(current_round) { + err.log_warning(format!("BFT failed to increment to the next round from round {current_round}")); return false; } // Update the timer for the leader certificate. @@ -334,8 +336,10 @@ impl BFT { // Retrieve the committee lookback of the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the even round {current_round}" + )); return false; } }; @@ -346,8 +350,8 @@ impl BFT { // Compute the leader for the current round. let computed_leader = match committee_lookback.get_leader(current_round) { Ok(leader) => leader, - Err(e) => { - error!("BFT failed to compute the leader for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!("BFT failed to compute the leader for the even round {current_round}")); return false; } }; @@ -425,8 +429,10 @@ impl BFT { // Retrieve the committee lookback for the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the odd round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the odd round {current_round}" + )); return false; } }; @@ -520,7 +526,7 @@ impl BFT { // Retrieve the committee lookback for the commit round. let Ok(committee_lookback) = self.ledger().get_committee_lookback_for_round(commit_round) else { - bail!("BFT failed to retrieve the committee with lag for commit round {commit_round}"); + bail!("BFT failed to retrieve the committee lookback for commit round {commit_round}"); }; // Either retrieve the cached leader or compute it. @@ -595,23 +601,19 @@ impl BFT { for round in (self.dag.read().last_committed_round() + 2..=leader_round.saturating_sub(2)).rev().step_by(2) { // Retrieve the previous committee for the leader round. - let previous_committee_lookback = match self.ledger().get_committee_lookback_for_round(round) { - Ok(committee) => committee, - Err(e) => { - bail!("BFT failed to retrieve a previous committee lookback for the even round {round} - {e}"); - } - }; + let previous_committee_lookback = + self.ledger().get_committee_lookback_for_round(round).with_context(|| { + format!("BFT failed to retrieve a previous committee lookback for the even round {round}") + })?; + // Either retrieve the cached leader or compute it. let leader = match self.ledger().latest_leader() { Some((cached_round, cached_leader)) if cached_round == round => cached_leader, _ => { // Compute the leader for the commit round. - let computed_leader = match previous_committee_lookback.get_leader(round) { - Ok(leader) => leader, - Err(e) => { - bail!("BFT failed to compute the leader for the even round {round} - {e}"); - } - }; + let computed_leader = previous_committee_lookback + .get_leader(round) + .with_context(|| format!("BFT failed to compute the leader for the even round {round}"))?; // Cache the computed leader. self.ledger().update_latest_leader(round, computed_leader); @@ -727,8 +729,9 @@ impl BFT { if let Some(cb) = self.bft_callback.get() { // Send the subdag and transmissions to consensus. if let Err(err) = cb.process_bft_subdag(subdag, transmissions).await { - error!("BFT failed to advance the subdag for round {anchor_round}: {err:?}"); + err.log_error("BFT failed to advance the subdag for round {anchor_round}"); return Ok(()); + } } diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index d8d0abfa24..f65cf448de 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -60,9 +60,13 @@ use snarkvm::{ narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, - utilities::task::{self, JoinHandle}, + utilities::{ + LoggableError, + task::{self, JoinHandle}, + }, }; +use anyhow::Context; use colored::Colorize; use futures::SinkExt; use indexmap::{IndexMap, IndexSet}; @@ -238,15 +242,15 @@ impl Gateway { worker_senders: IndexMap>, primary_callback: Arc>, sync_callback: Option>>, - ) { + ) -> Result<()> { debug!("Starting the gateway for the memory pool..."); - self.worker_senders.set(worker_senders).expect("The worker senders are already set"); + self.worker_senders.set(worker_senders).with_context(|| "The worker senders are already set")?; - self.primary_callback.set(primary_callback).expect("The primary callback is already set"); + self.primary_callback.set(primary_callback)?; if let Some(sync_callback) = sync_callback { - self.sync_callback.set(sync_callback).unwrap(); + self.sync_callback.set(sync_callback)?; } // Enable the TCP protocols. @@ -257,13 +261,15 @@ impl Gateway { self.enable_on_connect().await; // Enable the TCP listener. Note: This must be called after the above protocols. - let listen_addr = self.tcp.enable_listener().await.expect("Failed to enable the TCP listener"); + let listen_addr = self.tcp.enable_listener().await.with_context(|| "Failed to enable the TCP listener")?; debug!("Listening for validator connections at address {listen_addr:?}"); // Initialize the heartbeat. self.initialize_heartbeat(); info!("Started the gateway for the memory pool at '{}'", self.local_ip()); + + Ok(()) } } @@ -752,7 +758,7 @@ impl Gateway { blocks.ensure_response_is_well_formed(peer_ip, request.start_height, request.end_height)?; // Send the blocks to the sync module. if let Err(err) = cb.insert_block_response(peer_ip, blocks.0) { - warn!("Unable to process block response from '{peer_ip}': {err}"); + err.log_warning(format!("Unable to process block response from '{peer_ip}'")); } } Ok(()) @@ -1832,7 +1838,7 @@ mod prop_tests { (workers, tx_workers) }; - gateway.run(worker_senders, Arc::new(DummyGatewayPrimaryCallback::default()), None).await; + gateway.run(worker_senders, Arc::new(DummyGatewayPrimaryCallback::default()), None).await.unwrap(); assert_eq!( gateway.local_ip(), SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1f23e38264..3a77274881 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -14,7 +14,12 @@ // limitations under the License. use crate::events::{TransmissionRequest, TransmissionResponse}; -use snarkvm::{console::network::*, ledger::narwhal::TransmissionID}; +use snarkvm::{ + console::network::*, + ledger::{ + narwhal::TransmissionID, + }, +}; use std::net::SocketAddr; use tokio::sync::mpsc; diff --git a/node/bft/src/lib.rsbeGfet.bck b/node/bft/src/lib.rsbeGfet.bck new file mode 100644 index 0000000000..c1b4b5b546 --- /dev/null +++ b/node/bft/src/lib.rsbeGfet.bck @@ -0,0 +1,76 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![forbid(unsafe_code)] +#![allow(clippy::blocks_in_conditions)] +#![allow(clippy::type_complexity)] + +#[macro_use] +extern crate async_trait; +#[macro_use] +extern crate tracing; + +#[cfg(feature = "metrics")] +extern crate snarkos_node_metrics as metrics; + +pub use snarkos_node_bft_events as events; +pub use snarkos_node_bft_ledger_service as ledger_service; +pub use snarkos_node_bft_storage_service as storage_service; + +pub mod helpers; + +mod bft; +pub use bft::{BFT, BftCallback}; + +<<<<<<< HEAD +pub mod gateway; +pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; +======= +mod gateway; +pub use gateway::Gateway; +>>>>>>> 1bddd475a (misc(bft): get rid of bft channels) + +mod primary; +pub use primary::*; + +mod sync; +pub use sync::*; + +mod worker; +pub use worker::*; + +pub const CONTEXT: &str = "[MemoryPool]"; + +/// The port on which the memory pool listens for incoming connections. +pub const MEMORY_POOL_PORT: u16 = 5000; // port + +/// The maximum number of milliseconds to wait before proposing a batch. +pub const MAX_BATCH_DELAY_IN_MS: u64 = 2500; // ms +/// The minimum number of seconds to wait before proposing a batch. +pub const MIN_BATCH_DELAY_IN_SECS: u64 = 1; // seconds +/// The maximum number of milliseconds to wait before timing out on a fetch. +pub const MAX_FETCH_TIMEOUT_IN_MS: u64 = 3 * MAX_BATCH_DELAY_IN_MS; // ms +/// The maximum number of seconds allowed for the leader to send their certificate. +pub const MAX_LEADER_CERTIFICATE_DELAY_IN_SECS: i64 = 2 * MAX_BATCH_DELAY_IN_MS as i64 / 1000; // seconds +/// The maximum number of seconds before the timestamp is considered expired. +pub const MAX_TIMESTAMP_DELTA_IN_SECS: i64 = 10; // seconds +/// The maximum number of workers that can be spawned. +pub const MAX_WORKERS: u8 = 1; // worker(s) + +/// The interval at which each primary broadcasts a ping to every other node. +/// Note: If this is updated, be sure to update `MAX_BLOCKS_BEHIND` to correspond properly. +pub const PRIMARY_PING_IN_MS: u64 = 2 * MAX_BATCH_DELAY_IN_MS; // ms +/// The interval at which each worker broadcasts a ping to every other node. +pub const WORKER_PING_IN_MS: u64 = 4 * MAX_BATCH_DELAY_IN_MS; // ms diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index e37074f7fb..01695d1e30 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -185,7 +185,7 @@ impl Primary { // Next, initialize the gateway. let gateway_primary_callback = Arc::new(obj.clone()) as Arc>; let gateway_sync_callback = Arc::new(obj.sync.clone()); - obj.gateway.run(worker_senders, gateway_primary_callback, Some(gateway_sync_callback)).await; + obj.gateway.run(worker_senders, gateway_primary_callback, Some(gateway_sync_callback)).await?; Ok(obj) } diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 7c4ca78fcb..3853b9384e 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -17,24 +17,28 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, events::DataBlocks, - gateway::{Gateway, GatewaySyncCallback, Transport}, - helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, + helpers::CallbackHandle, + gateway::{Gateway, Transport, GatewaySyncCallback}, + helpers::{Pending, Storage, fmt_id, max_redundant_requests}, + events::{CertificateRequest, CertificateResponse, Event}, + ledger_service::LedgerService, }; - -use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncRequest, locators::BlockLocators}; use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, prelude::{cfg_into_iter, cfg_iter}, - utilities::spawn_blocking, + utilities::{LoggableError, spawn_blocking, task}, }; use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; +use locktick::{ + parking_lot::Mutex, + tokio::Mutex as TMutex, +}; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; #[cfg(not(feature = "serial"))] @@ -203,7 +207,7 @@ impl Sync { if let Some(ping) = &ping { match self_.get_block_locators() { Ok(locators) => ping.update_block_locators(locators), - Err(err) => error!("Failed to update block locators: {err}"), + Err(err) => err.log_error("Failed to update block locators"), } } } @@ -264,7 +268,7 @@ impl Sync { match self.try_advancing_block_synchronization().await { Ok(new_blocks) => new_blocks, Err(err) => { - error!("Block synchronization failed - {err}"); + err.log_error("Block synchronization failed"); false } } @@ -403,7 +407,7 @@ impl Sync { // If a callback was provided, send the certificates to it. if let Some(cb) = self.sync_callback.get() { cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; - } + } self.block_sync.set_sync_height(block_height); @@ -550,7 +554,7 @@ impl Sync { if within_gc { info!("Finished catching up with the network. Switching back to BFT sync."); if let Err(err) = self.sync_storage_with_ledger_at_bootup().await { - error!("BFT sync (with bootup routine) failed - {err}"); + err.log_error("BFT sync (with bootup routine) failed"); } } @@ -566,7 +570,7 @@ impl Sync { let _lock = self.sync_lock.lock().await; let self_ = self.clone(); - spawn_blocking(move || { + task::spawn_blocking(move || { // Check the next block. self_.ledger.check_next_block(&block)?; // Attempt to advance to the next block. @@ -856,12 +860,12 @@ impl Sync { } // Wait for the certificate to be fetched. // TODO (raychu86): Consider making the timeout dynamic based on network traffic and/or the number of validators. - match tokio::time::timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the certificate was fetched, return it. - Ok(result) => Ok(result?), - // If the certificate was not fetched, return an error. - Err(e) => bail!("Unable to fetch certificate {} - (timeout) {e}", fmt_id(certificate_id)), - } + let cert = timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))? + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))?; + + Ok(cert) } } diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index f1b95402da..27defe69f5 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -20,8 +20,9 @@ use crate::{ events::{Event, TransmissionRequest, TransmissionResponse}, gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, + ledger_service::LedgerService }; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkvm::{ console::prelude::*, ledger::{ diff --git a/node/bft/tests/gateway_e2e.rs b/node/bft/tests/gateway_e2e.rs index 74cdf4896a..0d0211e842 100644 --- a/node/bft/tests/gateway_e2e.rs +++ b/node/bft/tests/gateway_e2e.rs @@ -44,7 +44,7 @@ async fn new_test_gateway( let gateway = sample_gateway(accounts[0].clone(), storage, ledger); // Set up primary channels, we discard the rx as we're testing the gateway sans BFT. - gateway.run([].into(), Arc::new(DummyGatewayPrimaryCallback::default()), None).await; + gateway.run([].into(), Arc::new(DummyGatewayPrimaryCallback::default()), None).await.unwrap(); (accounts, gateway) } diff --git a/node/sync/Cargo.toml b/node/sync/Cargo.toml index a759cc33db..a16fb2afbd 100644 --- a/node/sync/Cargo.toml +++ b/node/sync/Cargo.toml @@ -43,7 +43,7 @@ workspace = true [dependencies.locktick] workspace = true -features = [ "parking_lot" ] +features = [ "parking_lot", "tokio" ] optional = true [dependencies.parking_lot] diff --git a/node/sync/src/block_sync.rs b/node/sync/src/block_sync.rs index c372a758d9..978be96ff2 100644 --- a/node/sync/src/block_sync.rs +++ b/node/sync/src/block_sync.rs @@ -16,12 +16,12 @@ use crate::{ helpers::{PeerPair, PrepareSyncRequest, SyncRequest}, locators::BlockLocators, +locators::{CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}, +communication_service::CommunicationService, }; use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::messages::DataBlocks; -use snarkos_node_sync_communication_service::CommunicationService; -use snarkos_node_sync_locators::{CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}; -use snarkvm::prelude::{Network, block::Block}; +use snarkvm::{console::network::Network, ledger::Block, utilities::LoggableError}; use anyhow::{Result, bail, ensure}; use indexmap::{IndexMap, IndexSet}; @@ -360,8 +360,8 @@ impl BlockSync { // Insert the chunk of block requests. for (height, (hash, previous_hash, _)) in requests.iter() { // Insert the block request into the sync pool using the sync IPs from the last block request in the chunk. - if let Err(error) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { - warn!("Block sync failed - {error}"); + if let Err(err) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { + err.log_error("Block sync failed"); return false; } } @@ -380,7 +380,7 @@ impl BlockSync { match sender { Some(sender) => { if let Err(err) = sender.await { - warn!("Failed to send block request to peer '{sync_ip}': {err}"); + err.log_warning(format!("Failed to send block request to peer '{sync_ip}'")); false } else { true @@ -401,7 +401,7 @@ impl BlockSync { let success = match result { Ok(success) => success, Err(err) => { - error!("tokio join error: {err}"); + err.log_error("tokio join error"); false } }; @@ -432,7 +432,7 @@ impl BlockSync { for block in blocks { if let Err(error) = self.insert_block_response(peer_ip, block) { self.remove_block_requests_to_peer(&peer_ip); - bail!("{error}"); + return Err(error); } } Ok(()) @@ -509,20 +509,20 @@ impl BlockSync { Ok(_) => match ledger.advance_to_next_block(&block) { Ok(_) => true, Err(err) => { - warn!( - "Failed to advance to next block (height: {}, hash: '{}'): {err}", + err.log_warning(format!( + "Failed to advance to next block (height: {}, hash: '{}')", block.height(), block.hash() - ); + )); false } }, Err(err) => { - warn!( - "The next block (height: {}, hash: '{}') is invalid - {err}", + err.log_warning(format!( + "The next block (height: {}, hash: '{}') is invalid", block.height(), block.hash() - ); + )); false } } @@ -1288,9 +1288,9 @@ mod tests { NUM_RECENT_BLOCKS, test_helpers::{sample_block_locators, sample_block_locators_with_fork}, }; + use crate::communication_service::test_helpers::DummyCommunicationService; use snarkos_node_bft_ledger_service::MockLedgerService; - use snarkos_node_sync_communication_service::test_helpers::DummyCommunicationService; use snarkvm::{ ledger::committee::Committee, prelude::{Field, TestRng}, From ce33e8a2374ffbe2d54468644998cf70df6be4e7 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 16:53:27 -0700 Subject: [PATCH 11/16] refactor(bft): reduce visibility for bft::Sync --- node/bft/src/lib.rs | 1 - node/bft/src/{sync/mod.rs => sync.rs} | 5 ----- 2 files changed, 6 deletions(-) rename node/bft/src/{sync/mod.rs => sync.rs} (99%) diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index 19aaab9c6b..d715971540 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -41,7 +41,6 @@ mod primary; pub use primary::*; mod sync; -pub use sync::*; mod worker; pub use worker::*; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync.rs similarity index 99% rename from node/bft/src/sync/mod.rs rename to node/bft/src/sync.rs index 3853b9384e..7b434f86fd 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync.rs @@ -812,11 +812,6 @@ impl Sync { self.block_sync.is_block_synced() } - /// Returns the number of blocks the node is behind the greatest peer height. - pub fn num_blocks_behind(&self) -> Option { - self.block_sync.num_blocks_behind() - } - /// Returns the current block locators of the node. pub fn get_block_locators(&self) -> Result> { self.block_sync.get_block_locators() From acf149459019c8d2a970bc264eba94411cf9642a Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 17:08:29 -0700 Subject: [PATCH 12/16] refactor(bft): reduce public API --- node/bft/src/helpers/mod.rs | 10 +++++----- node/bft/src/helpers/ready.rs | 1 + node/bft/src/lib.rs | 8 ++++---- node/bft/src/worker.rs | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 756e501089..47945f3fba 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -34,16 +34,16 @@ pub use proposal::*; pub mod proposal_cache; pub use proposal_cache::*; -pub mod ready; -pub use ready::*; +mod ready; +pub(crate) use ready::*; -pub mod resolver; -pub use resolver::*; +mod resolver; +pub(crate) use resolver::*; pub mod signed_proposals; pub use signed_proposals::*; -pub mod storage; +mod storage; pub use storage::*; #[cfg(feature = "telemetry")] diff --git a/node/bft/src/helpers/ready.rs b/node/bft/src/helpers/ready.rs index b69d546aef..b0d04ed17e 100644 --- a/node/bft/src/helpers/ready.rs +++ b/node/bft/src/helpers/ready.rs @@ -52,6 +52,7 @@ impl Ready { } /// Returns `true` if the ready queue is empty. + #[cfg(test)] pub fn is_empty(&self) -> bool { self.transmissions.is_empty() } diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index d715971540..e1ae90dbc8 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -37,13 +37,13 @@ pub use bft::{BFT, BftCallback}; pub mod gateway; pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; -mod primary; -pub use primary::*; - mod sync; +mod primary; +pub use primary::{Primary, PrimaryCallback}; + mod worker; -pub use worker::*; +pub use worker::Worker; pub const CONTEXT: &str = "[MemoryPool]"; diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 27defe69f5..24c2c0e844 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -16,11 +16,11 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, MAX_WORKERS, - ProposedBatch, events::{Event, TransmissionRequest, TransmissionResponse}, gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, - ledger_service::LedgerService + ledger_service::LedgerService, + primary::ProposedBatch, }; use snarkvm::{ From fc7f3b85fcbc7188ca3e19848015be3426c8dcc9 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 17:10:41 -0700 Subject: [PATCH 13/16] refactor(bft): move gateway-specific helpers to gateway submodule --- node/bft/src/gateway.rs | 8 +++++++- node/bft/src/{helpers => gateway}/cache.rs | 8 +------- node/bft/src/{helpers => gateway}/resolver.rs | 0 node/bft/src/helpers/mod.rs | 6 ------ 4 files changed, 8 insertions(+), 14 deletions(-) rename node/bft/src/{helpers => gateway}/cache.rs (99%) rename node/bft/src/{helpers => gateway}/resolver.rs (100%) diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index f65cf448de..536631de88 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -13,6 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod resolver; +use resolver::Resolver; + +mod cache; +use cache::Cache; + #[cfg(feature = "telemetry")] use crate::helpers::Telemetry; use crate::{ @@ -21,7 +27,7 @@ use crate::{ MEMORY_POOL_PORT, Worker, events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, - helpers::{Cache, CallbackHandle, Resolver, Storage, WorkerSender, assign_to_worker}, + helpers::{CallbackHandle, Storage, WorkerSender, assign_to_worker}, }; use snarkos_account::Account; use snarkos_node_bft_events::{ diff --git a/node/bft/src/helpers/cache.rs b/node/bft/src/gateway/cache.rs similarity index 99% rename from node/bft/src/helpers/cache.rs rename to node/bft/src/gateway/cache.rs index 377bdc724e..fca474bba0 100644 --- a/node/bft/src/helpers/cache.rs +++ b/node/bft/src/gateway/cache.rs @@ -54,13 +54,7 @@ pub struct Cache { impl Default for Cache { /// Initializes a new instance of the cache. fn default() -> Self { - Self::new() - } -} - -impl Cache { - /// Initializes a new instance of the cache. - pub fn new() -> Self { + // This needs to be manually implemented as `Network` does not implement `Default`. Self { seen_inbound_connections: Default::default(), seen_inbound_events: Default::default(), diff --git a/node/bft/src/helpers/resolver.rs b/node/bft/src/gateway/resolver.rs similarity index 100% rename from node/bft/src/helpers/resolver.rs rename to node/bft/src/gateway/resolver.rs diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 47945f3fba..16a7d70fa9 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -13,9 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod cache; -pub use cache::*; - pub mod channels; pub use channels::*; @@ -37,9 +34,6 @@ pub use proposal_cache::*; mod ready; pub(crate) use ready::*; -mod resolver; -pub(crate) use resolver::*; - pub mod signed_proposals; pub use signed_proposals::*; From 37d590bcb46f34f3517063ee5d318b2905191c20 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 17:29:22 -0700 Subject: [PATCH 14/16] misc(bft): use snarkvm::utilities::task everywhere --- Cargo.lock | 132 +++++++++++++--------------- node/bft/src/bft.rs | 3 +- node/bft/src/helpers/channels.rs | 7 +- node/bft/src/primary.rs | 51 ++++++----- node/bft/src/sync.rs | 31 ++++--- node/bft/src/worker.rs | 26 ++++-- node/bft/tests/components/worker.rs | 17 ++-- node/sync/src/block_sync.rs | 17 ++-- 8 files changed, 139 insertions(+), 145 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a9494b826f..9eeaa6a9c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2516,16 +2516,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "num-format" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" -dependencies = [ - "arrayvec", - "itoa", -] - [[package]] name = "num-integer" version = "0.1.46" @@ -4139,12 +4129,10 @@ dependencies = [ [[package]] name = "snarkvm" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "anstyle", "anyhow", "dotenvy", - "num-format", "rand 0.8.5", "serde_json", "snarkvm-algorithms", @@ -4164,7 +4152,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4192,7 +4180,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms-cuda" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "blst", "cc", @@ -4203,7 +4191,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4217,7 +4205,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-network", "snarkvm-circuit-types", @@ -4227,7 +4215,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-types", "snarkvm-console-algorithms", @@ -4237,7 +4225,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-types", @@ -4247,7 +4235,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "indexmap 2.11.3", "itertools 0.14.0", @@ -4265,12 +4253,12 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment-witness" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" [[package]] name = "snarkvm-circuit-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-collections", @@ -4281,7 +4269,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4295,7 +4283,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-address", @@ -4310,7 +4298,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4323,7 +4311,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-console-types-boolean", @@ -4332,7 +4320,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4342,7 +4330,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4354,7 +4342,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4366,7 +4354,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4377,7 +4365,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4389,7 +4377,7 @@ dependencies = [ [[package]] name = "snarkvm-console" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-account", "snarkvm-console-algorithms", @@ -4402,7 +4390,7 @@ dependencies = [ [[package]] name = "snarkvm-console-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bs58", "snarkvm-console-network", @@ -4413,7 +4401,7 @@ dependencies = [ [[package]] name = "snarkvm-console-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "blake2s_simd", "smallvec", @@ -4426,7 +4414,7 @@ dependencies = [ [[package]] name = "snarkvm-console-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "rayon", @@ -4437,7 +4425,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "enum-iterator", @@ -4457,7 +4445,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "bech32", @@ -4475,7 +4463,7 @@ dependencies = [ [[package]] name = "snarkvm-console-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "enum-iterator", "enum_index", @@ -4495,7 +4483,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-address", @@ -4510,7 +4498,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4521,7 +4509,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", ] @@ -4529,7 +4517,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4539,7 +4527,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4550,7 +4538,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4561,7 +4549,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4572,7 +4560,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4583,7 +4571,7 @@ dependencies = [ [[package]] name = "snarkvm-curves" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "rand 0.8.5", "rayon", @@ -4597,7 +4585,7 @@ dependencies = [ [[package]] name = "snarkvm-fields" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4614,7 +4602,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4641,7 +4629,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-authority" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "rand 0.8.5", @@ -4653,7 +4641,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-block" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "indexmap 2.11.3", @@ -4675,7 +4663,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-committee" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "indexmap 2.11.3", @@ -4694,7 +4682,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-ledger-narwhal-batch-certificate", "snarkvm-ledger-narwhal-batch-header", @@ -4707,7 +4695,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-certificate" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4720,7 +4708,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-header" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4733,7 +4721,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-data" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bytes", "serde_json", @@ -4744,7 +4732,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-subdag" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "indexmap 2.11.3", "rayon", @@ -4759,7 +4747,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bytes", "serde_json", @@ -4772,7 +4760,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission-id" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console", "snarkvm-ledger-puzzle", @@ -4781,7 +4769,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4801,7 +4789,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle-epoch" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4824,7 +4812,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-query" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "async-trait", @@ -4841,7 +4829,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-store" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std-storage", "anyhow", @@ -4868,7 +4856,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-test-helpers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4886,7 +4874,7 @@ dependencies = [ [[package]] name = "snarkvm-metrics" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "metrics", ] @@ -4894,7 +4882,7 @@ dependencies = [ [[package]] name = "snarkvm-parameters" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4917,7 +4905,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4950,7 +4938,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-process" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "colored 3.0.0", @@ -4975,7 +4963,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "indexmap 2.11.3", "paste", @@ -4993,7 +4981,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-snark" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bincode", "serde_json", @@ -5006,7 +4994,7 @@ dependencies = [ [[package]] name = "snarkvm-utilities" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -5030,7 +5018,7 @@ dependencies = [ [[package]] name = "snarkvm-utilities-derives" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#34a44e45a748d3fd3dd3cd47a561b25be4bfe984" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "proc-macro2", "quote 1.0.40", diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index c0bafe48b9..ce3d15109a 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -729,9 +729,8 @@ impl BFT { if let Some(cb) = self.bft_callback.get() { // Send the subdag and transmissions to consensus. if let Err(err) = cb.process_bft_subdag(subdag, transmissions).await { - err.log_error("BFT failed to advance the subdag for round {anchor_round}"); + err.log_error(format!("BFT failed to advance the subdag for round {anchor_round}")); return Ok(()); - } } diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 3a77274881..1f23e38264 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -14,12 +14,7 @@ // limitations under the License. use crate::events::{TransmissionRequest, TransmissionResponse}; -use snarkvm::{ - console::network::*, - ledger::{ - narwhal::TransmissionID, - }, -}; +use snarkvm::{console::network::*, ledger::narwhal::TransmissionID}; use std::net::SocketAddr; use tokio::sync::mpsc; diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 01695d1e30..8975e60f9f 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -52,7 +52,7 @@ use snarkvm::{ narwhal::{BatchCertificate, BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - utilities::spawn_blocking, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -78,7 +78,6 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::task::JoinHandle; /// A helper type for an optional proposed batch. pub type ProposedBatch = RwLock>>; @@ -410,7 +409,7 @@ impl Primary { // Resend the batch proposal to the validator for signing. Some(peer_ip) => { let (gateway, event_, round) = (self.gateway.clone(), event.clone(), proposal.round()); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending batch proposal for round {round} to peer '{peer_ip}'"); // Resend the batch proposal to the peer. if gateway.send(peer_ip, event_).await.is_none() { @@ -571,7 +570,7 @@ impl Primary { } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking(|| match transaction { + let transaction = task::spawn_blocking(|| match transaction { Data::Object(transaction) => Ok(transaction), Data::Buffer(bytes) => { Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) @@ -676,7 +675,7 @@ impl Primary { // Prepare the previous batch certificate IDs. let previous_certificate_ids = previous_certificates.into_iter().map(|c| c.id()).collect(); // Sign the batch header and construct the proposal. - let (batch_header, proposal) = spawn_blocking(move || { + let (batch_header, proposal) = task::spawn_blocking(move || { BatchHeader::new( &private_key, round, @@ -720,7 +719,7 @@ impl Primary { let BatchPropose { round: batch_round, batch_header } = batch_propose; // Deserialize the batch header. - let batch_header = spawn_blocking(|| batch_header.deserialize_blocking()).await?; + let batch_header = task::spawn_blocking(|| batch_header.deserialize_blocking()).await?; // Ensure the round matches in the batch header. if batch_round != batch_header.round() { // Proceed to disconnect the validator. @@ -786,7 +785,7 @@ impl Primary { // Instead, rebroadcast the cached signature to the peer. if signed_round == batch_header.round() && signed_batch_id == batch_header.batch_id() { let gateway = self.gateway.clone(); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending a signature for a batch in round {batch_round} from '{peer_ip}'"); let event = Event::BatchSignature(BatchSignature::new(batch_header.batch_id(), signature)); // Resend the batch signature to the peer. @@ -850,9 +849,10 @@ impl Primary { // Ensure the batch header from the peer is valid. let (storage, header) = (self.storage.clone(), batch_header.clone()); - let missing_transmissions = - spawn_blocking(move || storage.check_batch_header(&header, missing_transmissions, Default::default())) - .await?; + let missing_transmissions = task::spawn_blocking(move || { + storage.check_batch_header(&header, missing_transmissions, Default::default()) + }) + .await?; // Inserts the missing transmissions into the workers. self.insert_missing_transmissions_into_workers(peer_ip, missing_transmissions.into_iter())?; @@ -877,7 +877,7 @@ impl Primary { (transmission_id, transmission) { // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking(|| match transaction { + let transaction = task::spawn_blocking(|| match transaction { Data::Object(transaction) => Ok(transaction), Data::Buffer(bytes) => { Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) @@ -944,7 +944,7 @@ impl Primary { let batch_id = batch_header.batch_id(); // Sign the batch ID. let account = self.gateway.account().clone(); - let signature = spawn_blocking(move || account.sign(&[batch_id], &mut rand::thread_rng())).await?; + let signature = task::spawn_blocking(move || account.sign(&[batch_id], &mut rand::thread_rng())).await?; // Ensure the proposal has not already been signed. // @@ -972,7 +972,7 @@ impl Primary { // Broadcast the signature back to the validator. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { let event = Event::BatchSignature(BatchSignature::new(batch_id, signature)); // Send the batch signature to the peer. if self_.gateway.send(peer_ip, event).await.is_some() { @@ -1017,7 +1017,7 @@ impl Primary { } let self_ = self.clone(); - let Some(proposal) = spawn_blocking(move || { + let Some(proposal) = task::spawn_blocking(move || { // Acquire the write lock. let mut proposed_batch = self_.proposed_batch.write(); // Add the signature to the batch, and determine if the batch is ready to be certified. @@ -1202,7 +1202,7 @@ impl Primary { // Retrieve the block locators. let self__ = self_.clone(); - let block_locators = match spawn_blocking(move || self__.sync.get_block_locators()).await { + let block_locators = match task::spawn_blocking(move || self__.sync.get_block_locators()).await { Ok(block_locators) => block_locators, Err(e) => { warn!("Failed to retrieve block locators - {e}"); @@ -1463,7 +1463,8 @@ impl Primary { let transmissions = transmissions.into_iter().collect::>(); // Store the certified batch. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking(move || storage.insert_certificate(certificate_, transmissions, Default::default())).await?; + task::spawn_blocking(move || storage.insert_certificate(certificate_, transmissions, Default::default())) + .await?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. if let Some(cb) = self.primary_callback.get() { @@ -1549,8 +1550,10 @@ impl Primary { if !self.storage.contains_certificate(certificate.id()) { // Store the batch certificate. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking(move || storage.insert_certificate(certificate_, missing_transmissions, Default::default())) - .await?; + task::spawn_blocking(move || { + storage.insert_certificate(certificate_, missing_transmissions, Default::default()) + }) + .await?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. if let Some(cb) = self.primary_callback.get() { @@ -1759,7 +1762,7 @@ impl Primary { impl Primary { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. @@ -1804,9 +1807,9 @@ impl GatewayPrimaryCallback for Primary { // Spawn a task to process the primary certificate. { let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking(|| primary_certificate.deserialize_blocking()).await + let Ok(primary_certificate) = task::spawn_blocking(|| primary_certificate.deserialize_blocking()).await else { warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); return; @@ -1831,7 +1834,7 @@ impl GatewayPrimaryCallback for Primary { } // Spawn a task to process the proposed batch. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Process the batch proposal. let round = batch_propose.round; if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { @@ -1865,9 +1868,9 @@ impl GatewayPrimaryCallback for Primary { } // Spawn a task to process the batch certificate. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking(|| batch_certificate.deserialize_blocking()).await else { + let Ok(batch_certificate) = task::spawn_blocking(|| batch_certificate.deserialize_blocking()).await else { warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); return; }; diff --git a/node/bft/src/sync.rs b/node/bft/src/sync.rs index 7b434f86fd..70c342b131 100644 --- a/node/bft/src/sync.rs +++ b/node/bft/src/sync.rs @@ -16,29 +16,29 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, - events::DataBlocks, - helpers::CallbackHandle, - gateway::{Gateway, Transport, GatewaySyncCallback}, - helpers::{Pending, Storage, fmt_id, max_redundant_requests}, - events::{CertificateRequest, CertificateResponse, Event}, + events::{CertificateRequest, CertificateResponse, DataBlocks, Event}, + gateway::{Gateway, GatewaySyncCallback, Transport}, + helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, ledger_service::LedgerService, }; - + use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncRequest, locators::BlockLocators}; use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, - prelude::{cfg_into_iter, cfg_iter}, - utilities::{LoggableError, spawn_blocking, task}, + utilities::{ + LoggableError, + cfg_into_iter, + cfg_iter, + spawn_blocking, + task::{self, JoinHandle}, + }, }; use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::Mutex, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; #[cfg(not(feature = "serial"))] @@ -54,7 +54,6 @@ use std::{ use tokio::sync::Mutex as TMutex; use tokio::{ sync::oneshot, - task::JoinHandle, time::{sleep, timeout}, }; @@ -304,7 +303,7 @@ impl GatewaySyncCallback for Sync { if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { // Send the certificate response to the peer. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; }); } @@ -407,7 +406,7 @@ impl Sync { // If a callback was provided, send the certificates to it. if let Some(cb) = self.sync_callback.get() { cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; - } + } self.block_sync.set_sync_height(block_height); @@ -867,7 +866,7 @@ impl Sync { impl Sync { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 24c2c0e844..c134e0709c 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -24,16 +24,19 @@ use crate::{ }; use snarkvm::{ - console::prelude::*, + console::{network::Network, prelude::Read}, ledger::{ block::Transaction, narwhal::{BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - utilities::task::{self, JoinHandle}, + utilities::{ + FromBytes, + task::{self, JoinHandle}, + }, }; -use anyhow::Context; +use anyhow::{Context, Result, bail, ensure}; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -553,19 +556,25 @@ mod tests { use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ - console::{network::Network, types::Field}, + console::{ + network::{ConsensusVersion, Network}, + types::{Address, Field}, + }, ledger::{ block::Block, committee::Committee, narwhal::{BatchCertificate, Subdag, Transmission, TransmissionID}, snarkvm_ledger_test_helpers::sample_execution_transaction_with_fee, }, - prelude::Address, + prelude::{Itertools, Uniform}, + utilities::TestRng, }; + use anyhow::anyhow; use bytes::Bytes; use indexmap::IndexMap; use mockall::mock; + use rand::Rng; use std::{io, ops::Range}; type CurrentNetwork = snarkvm::prelude::MainnetV0; @@ -921,7 +930,7 @@ mod tests { for i in 1..=num_flood_requests { let worker_ = worker.clone(); let peer_ip = peer_ips.pop().unwrap(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -941,7 +950,7 @@ mod tests { // Flood the pending queue with transmission requests again, this time to a single peer for i in 1..=num_flood_requests { let worker_ = worker.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(first_peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -994,12 +1003,15 @@ mod tests { mod prop_tests { use super::*; use crate::Gateway; + use snarkos_node_bft_ledger_service::MockLedgerService; use snarkvm::{ console::account::Address, ledger::committee::{Committee, MIN_VALIDATOR_STAKE}, + prelude::TestRng, }; + use rand::Rng; use test_strategy::proptest; type CurrentNetwork = snarkvm::prelude::MainnetV0; diff --git a/node/bft/tests/components/worker.rs b/node/bft/tests/components/worker.rs index be53686ae0..4408afbf99 100644 --- a/node/bft/tests/components/worker.rs +++ b/node/bft/tests/components/worker.rs @@ -19,10 +19,7 @@ use crate::common::{ utils::{sample_ledger, sample_worker}, }; use snarkos_node_bft::helpers::max_redundant_requests; -use snarkvm::{ - ledger::narwhal::TransmissionID, - prelude::{Network, TestRng}, -}; +use snarkvm::{console::network::Network, ledger::narwhal::TransmissionID, prelude::TestRng, utilities::task}; use std::net::SocketAddr; @@ -57,7 +54,7 @@ async fn test_resend_transmission_request() { // Send a request to fetch the dummy transmission. let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -76,7 +73,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -95,7 +92,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let peer_ip = peer_ips.pop().unwrap(); let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -141,7 +138,7 @@ async fn test_flood_transmission_requests() { // Send the maximum number of redundant requests to fetch the dummy transmission. for peer_ip in remaining_peer_ips.clone() { let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); } tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -160,7 +157,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -179,7 +176,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = remaining_peer_ips.pop().unwrap(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; diff --git a/node/sync/src/block_sync.rs b/node/sync/src/block_sync.rs index 978be96ff2..5016451b05 100644 --- a/node/sync/src/block_sync.rs +++ b/node/sync/src/block_sync.rs @@ -14,10 +14,9 @@ // limitations under the License. use crate::{ + communication_service::CommunicationService, helpers::{PeerPair, PrepareSyncRequest, SyncRequest}, - locators::BlockLocators, -locators::{CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}, -communication_service::CommunicationService, + locators::{BlockLocators, CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}, }; use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::messages::DataBlocks; @@ -1283,12 +1282,14 @@ fn construct_request( #[cfg(test)] mod tests { use super::*; - use crate::locators::{ - CHECKPOINT_INTERVAL, - NUM_RECENT_BLOCKS, - test_helpers::{sample_block_locators, sample_block_locators_with_fork}, + use crate::{ + communication_service::test_helpers::DummyCommunicationService, + locators::{ + CHECKPOINT_INTERVAL, + NUM_RECENT_BLOCKS, + test_helpers::{sample_block_locators, sample_block_locators_with_fork}, + }, }; - use crate::communication_service::test_helpers::DummyCommunicationService; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkvm::{ From 62fbd9f9db0cd1e0721c8806a51e9be61b57e35b Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 15 Sep 2025 16:08:32 -0700 Subject: [PATCH 15/16] test(node): collect logs in tests --- .circleci/config.yml | 7 ++++++- Cargo.lock | 1 + node/Cargo.toml | 3 +++ node/router/Cargo.toml | 3 +++ node/router/tests/heartbeat.rs | 1 + node/tests/disconnect.rs | 2 ++ node/tests/handshake.rs | 2 ++ node/tests/peering.rs | 1 + 8 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d924f44dea..589d822b52 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -125,8 +125,13 @@ commands: - setup_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache - run: + name: "Build Tests" no_output_timeout: 30m - command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> --norun + - run: + name: "Run Tests" + no_output_timeout: 30m + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 RUST_BACKTRACE=1 RUST_LOG=snarkos=trace cargo test << parameters.flags >> --norun - clear_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache diff --git a/Cargo.lock b/Cargo.lock index 9eeaa6a9c7..17be62dc2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3827,6 +3827,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "tracing-test", ] [[package]] diff --git a/node/Cargo.toml b/node/Cargo.toml index 4929c51d4f..18028642a1 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -155,6 +155,9 @@ features = [ "test" ] workspace = true features = [ "env-filter", "fmt" ] +[dev-dependencies.tracing-test] +workspace = true + [dev-dependencies.rand_chacha] workspace = true diff --git a/node/router/Cargo.toml b/node/router/Cargo.toml index 0a13d9711c..5b6135464c 100644 --- a/node/router/Cargo.toml +++ b/node/router/Cargo.toml @@ -134,6 +134,9 @@ features = [ "test" ] workspace = true features = [ "test-helpers" ] +[dev-dependencies.tracing-test] +workspace = true + [dev-dependencies.tracing-subscriber] workspace = true features = [ "env-filter", "fmt" ] diff --git a/node/router/tests/heartbeat.rs b/node/router/tests/heartbeat.rs index c181c7c1c2..0e25c28f7c 100644 --- a/node/router/tests/heartbeat.rs +++ b/node/router/tests/heartbeat.rs @@ -91,6 +91,7 @@ async fn connect_to(router: &TestRouter, other: &TestRouter) { /// Checks that clients are ordered before nodes and that ordering is based on when a peer was last seen. #[tokio::test] +#[tracing_test::traced_test] async fn peer_priority_ordering() { let router = client(0, 10).await; router.enable_listener().await; diff --git a/node/tests/disconnect.rs b/node/tests/disconnect.rs index 97469e8bce..67b4462fe4 100644 --- a/node/tests/disconnect.rs +++ b/node/tests/disconnect.rs @@ -33,6 +33,7 @@ use std::time::Duration; macro_rules! test_disconnect { ($node_type:ident, $peer_type:ident, $node_disconnects:expr, $($attr:meta)?) => { #[tokio::test] + #[tracing_test::traced_test] $(#[$attr])? async fn $peer_type() { use deadline::deadline; @@ -144,6 +145,7 @@ mod validator { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn duplicate_disconnect_attempts() { // common::initialise_logger(3); diff --git a/node/tests/handshake.rs b/node/tests/handshake.rs index b3b8455f7e..f2049a5d30 100644 --- a/node/tests/handshake.rs +++ b/node/tests/handshake.rs @@ -183,6 +183,7 @@ mod validator { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn simultaneous_connection_attempt() { // common::initialise_logger(3); @@ -242,6 +243,7 @@ async fn simultaneous_connection_attempt() { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn duplicate_connection_attempts() { // common::initialise_logger(3); diff --git a/node/tests/peering.rs b/node/tests/peering.rs index e2ffd20132..5c40605bd0 100644 --- a/node/tests/peering.rs +++ b/node/tests/peering.rs @@ -30,6 +30,7 @@ macro_rules! test_reject_unsolicited_peer_response { $( paste! { #[tokio::test] + #[tracing_test::traced_test] async fn [<$node_type _rejects_unsolicited_peer_response>]() { // Spin up a full node. let node = $crate::common::node::$node_type().await; From f7c7816a95fd0d1c84371fca2543a795465e34e3 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 15 Sep 2025 16:32:44 -0700 Subject: [PATCH 16/16] feat: use thread-safe panic handling refactor(utils): move signal handling to dedicated snarkos-utilities crate --- .circleci/config.yml | 4 +- Cargo.lock | 16 +- Cargo.toml | 7 +- cli/Cargo.toml | 3 + cli/src/commands/developer/scan.rs | 4 +- cli/src/commands/start.rs | 95 +++++----- display/Cargo.toml | 3 + display/src/lib.rs | 29 +-- display/src/pages/logs.rs | 4 + node/Cargo.toml | 6 +- node/bft/Cargo.toml | 3 + node/bft/examples/simple_node.rs | 8 +- node/bft/ledger-service/Cargo.toml | 3 + node/bft/ledger-service/src/ledger.rs | 22 +-- node/bft/ledger-service/src/translucent.rs | 18 +- node/bft/src/sync.rs | 7 +- node/bft/tests/common/primary.rs | 5 +- node/bft/tests/common/utils.rs | 7 +- node/cdn/Cargo.toml | 3 + node/cdn/src/blocks.rs | 45 +++-- node/src/client/mod.rs | 40 ++--- node/src/node.rs | 38 ++-- node/src/prover/mod.rs | 29 ++- node/src/traits.rs | 89 ++-------- node/src/validator/mod.rs | 25 +-- node/tests/common/node.rs | 9 +- snarkos/main.rs | 60 +++---- utilities/Cargo.toml | 24 +++ utilities/LICENSE.md | 194 +++++++++++++++++++++ utilities/src/lib.rs | 17 ++ utilities/src/signals.rs | 132 ++++++++++++++ 31 files changed, 663 insertions(+), 286 deletions(-) create mode 100644 utilities/Cargo.toml create mode 100644 utilities/LICENSE.md create mode 100644 utilities/src/lib.rs create mode 100644 utilities/src/signals.rs diff --git a/.circleci/config.yml b/.circleci/config.yml index 589d822b52..099ca13c50 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -127,11 +127,11 @@ commands: - run: name: "Build Tests" no_output_timeout: 30m - command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> --norun + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> --no-run - run: name: "Run Tests" no_output_timeout: 30m - command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 RUST_BACKTRACE=1 RUST_LOG=snarkos=trace cargo test << parameters.flags >> --norun + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 RUST_BACKTRACE=1 RUST_LOG=snarkos=trace cargo test << parameters.flags >> - clear_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache diff --git a/Cargo.lock b/Cargo.lock index 17be62dc2b..b97ffc964a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3767,6 +3767,7 @@ dependencies = [ "snarkos-node-cdn", "snarkos-node-metrics", "snarkos-node-rest", + "snarkos-utilities", "snarkvm", "sys-info", "tempfile", @@ -3787,6 +3788,7 @@ dependencies = [ "crossterm 0.29.0", "ratatui", "snarkos-node", + "snarkos-utilities", "snarkvm", "tokio", ] @@ -3806,7 +3808,6 @@ dependencies = [ "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "num_cpus", - "once_cell", "parking_lot", "paste", "pea2pea", @@ -3821,6 +3822,7 @@ dependencies = [ "snarkos-node-router", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "time", "tokio", @@ -3868,6 +3870,7 @@ dependencies = [ "snarkos-node-metrics", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "test-strategy 0.4.3", "time", @@ -3909,6 +3912,7 @@ dependencies = [ "rand 0.8.5", "rayon", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tracing", @@ -3943,6 +3947,7 @@ dependencies = [ "serde", "serde_json", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tokio-test", @@ -4050,6 +4055,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "tracing-test", ] [[package]] @@ -4127,6 +4133,14 @@ dependencies = [ "tracing", ] +[[package]] +name = "snarkos-utilities" +version = "4.2.1" +dependencies = [ + "tokio", + "tracing", +] + [[package]] name = "snarkvm" version = "4.2.1" diff --git a/Cargo.toml b/Cargo.toml index 3e99b33c28..d46bb0f0be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,8 @@ members = [ "node/sync", "node/sync/communication-service", "node/sync/locators", - "node/tcp" + "node/tcp", + "utilities", ] [workspace.dependencies.aleo-std] @@ -243,6 +244,10 @@ version = "=4.2.1" path = "node/tcp" version = "=4.2.1" +[workspace.dependencies.snarkos-utilities] +path = "utilities" +version = "=4.2.1" + [[bin]] name = "snarkos" path = "snarkos/main.rs" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5d2d04d82d..9fdaa256a5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -118,6 +118,9 @@ workspace = true [dependencies.snarkos-node-rest] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "parameters", "circuit", "package" ] diff --git a/cli/src/commands/developer/scan.rs b/cli/src/commands/developer/scan.rs index 3284e82baf..41e5add81a 100644 --- a/cli/src/commands/developer/scan.rs +++ b/cli/src/commands/developer/scan.rs @@ -17,6 +17,8 @@ use super::DEFAULT_ENDPOINT; use crate::helpers::{args::prepare_endpoint, dev::get_development_key}; use snarkos_node_cdn::CDN_BASE_URL; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::network::Network, prelude::{Ciphertext, Field, FromBytes, Plaintext, PrivateKey, Record, ViewKey, block::Block}, @@ -285,7 +287,7 @@ impl Scan { let rt = tokio::runtime::Runtime::new()?; // Create a placeholder shutdown flag. - let _shutdown = Default::default(); + let _shutdown = SimpleStoppable::new(); // Copy endpoint for background task. let endpoint = endpoint.clone(); diff --git a/cli/src/commands/start.rs b/cli/src/commands/start.rs index 5825ac821c..42e96cb2cd 100644 --- a/cli/src/commands/start.rs +++ b/cli/src/commands/start.rs @@ -23,6 +23,8 @@ use snarkos_node::{ rest::DEFAULT_REST_PORT, router::{DEFAULT_NODE_PORT, messages::NodeType}, }; +use snarkos_utilities::SignalHandler; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -54,7 +56,10 @@ use std::{ path::PathBuf, sync::{Arc, atomic::AtomicBool}, }; -use tokio::runtime::{self, Runtime}; +use tokio::{ + runtime::{self, Runtime}, + sync::mpsc, +}; use ureq::http; /// The recommended minimum number of 'open files' limit for a validator. @@ -246,7 +251,7 @@ pub struct Start { } impl Start { - /// Starts the snarkOS node. + /// Starts the snarkOS node and blocks until it terminates. pub fn parse(self) -> Result { // Prepare the shutdown flag. let shutdown: Arc = Default::default(); @@ -264,45 +269,32 @@ impl Start { // Initialize the runtime. Self::runtime().block_on(async move { // Error messages. - let node_parse_error = || "Failed to parse node arguments"; - let display_start_error = || "Failed to initialize the display"; + let node_parse_error = || "Failed to start node"; + let signal_handler = SignalHandler::new(); // Clone the configurations. - let mut cli = self.clone(); - // Parse the network. - match cli.network { - MainnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - TestnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - CanaryV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } + let mut self_ = self.clone(); + + // Parse the node arguments, start it, and block until shutdown. + match self_.network { + MainnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + + TestnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + CanaryV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, _ => panic!("Invalid network ID specified"), }; - // Note: Do not move this. The pending await must be here otherwise - // other snarkOS commands will not exit. - std::future::pending::<()>().await; + + // Wait until the node is stopped due to a signal (e.g., Ctrl+C). + Ok(String::new()) }) } @@ -601,9 +593,9 @@ impl Start { } } - /// Returns the node type corresponding to the given configurations. + /// Start the node and blocks until it terminates. #[rustfmt::skip] - async fn parse_node(&mut self, shutdown: Arc) -> Result> { + async fn parse_node(&mut self, log_receiver: mpsc::Receiver>, signal_handler: Arc) -> Result<()> { if !self.nobanner { // Print the welcome banner. println!("{}", crate::helpers::welcome_message()); @@ -712,21 +704,27 @@ impl Start { } }; - // TODO(kaimast): start the display earlier and show sync progress. if !self.nodisplay && !self.nocdn { println!("🪧 The terminal UI will not start until the node has finished syncing from the CDN. If this step takes too long, consider restarting with `--nodisplay`."); } // Initialize the node. - match node_type { - NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, shutdown.clone()).await, - NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, shutdown.clone()).await, - NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, shutdown).await + let node = match node_type { + NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, signal_handler.clone()).await, + NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, signal_handler.clone()).await, + NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, signal_handler.clone()).await + }?; + + if !self.nodisplay { + Display::start(node.clone(), log_receiver, signal_handler.clone()).with_context(|| "Failed to start the display")?; } + + node.wait_for_signals(&signal_handler).await; + Ok(()) } - /// Returns a runtime for the node. + /// Starts a rayon thread pool and tokio runtime for the node, and returns the tokio `Runtime`. fn runtime() -> Runtime { // Retrieve the number of cores. let num_cores = num_cpus::get(); @@ -737,14 +735,17 @@ impl Start { let (num_tokio_worker_threads, max_tokio_blocking_threads, num_rayon_cores_global) = (2 * num_cores, 512, num_cores); - // Initialize the parallelization parameters. + // Set up the rayon thread pool. + // A custom panic handler is not needed here, as rayon propagates the panic to the calling thread by default (except for `rayon::spawn` which we do not use). rayon::ThreadPoolBuilder::new() .stack_size(8 * 1024 * 1024) .num_threads(num_rayon_cores_global) .build_global() .unwrap(); - // Initialize the runtime configuration. + // Set up the tokio Runtime. + // TODO(kaimast): set up a panic handler here for each worker thread once [`tokio::runtime::Builder::unhandled_panic`](https://docs.rs/tokio/latest/tokio/runtime/struct.Builder.html#method.unhandled_panic) is stabilized. + // As of now, detached tasks may panic and the error may not be handled by the top-level `catch_unwind`. runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(8 * 1024 * 1024) diff --git a/display/Cargo.toml b/display/Cargo.toml index ad46b17108..d3156670b5 100644 --- a/display/Cargo.toml +++ b/display/Cargo.toml @@ -28,6 +28,9 @@ version = "0.29" [dependencies.snarkos-node] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true diff --git a/display/src/lib.rs b/display/src/lib.rs index 1a03058594..1dda3193b9 100644 --- a/display/src/lib.rs +++ b/display/src/lib.rs @@ -22,6 +22,8 @@ mod tabs; use tabs::Tabs; use snarkos_node::Node; +use snarkos_utilities::Stoppable; + use snarkvm::prelude::Network; use anyhow::Result; @@ -41,6 +43,8 @@ use ratatui::{ }; use std::{ io, + io::Write, + sync::Arc, thread, time::{Duration, Instant}, }; @@ -67,7 +71,7 @@ fn content_style() -> Style { impl Display { /// Initializes a new display. - pub fn start(node: Node, log_receiver: Receiver>) -> Result<()> { + pub fn start(node: Node, log_receiver: Receiver>, stoppable: Arc) -> Result<()> { // Initialize the display. enable_raw_mode()?; let mut stdout = io::stdout(); @@ -84,25 +88,34 @@ impl Display { }; // Render the display. - let res = display.render(&mut terminal); + let res = display.render(&mut terminal, stoppable); // Terminate the display. disable_raw_mode()?; execute!(terminal.backend_mut(), LeaveAlternateScreen, DisableMouseCapture)?; terminal.show_cursor()?; - // Exit. + // Print any error that may have occurred. if let Err(err) = res { - println!("{err:?}") + eprintln!("{err:?}"); } + // Write any remaining log output to stdout while the node is shutting down. + let mut log_receiver = display.logs.into_log_receiver(); + tokio::spawn(async move { + let mut stdout = io::stdout(); + while let Some(log) = log_receiver.recv().await { + let _ = write!(stdout, "{}", String::from_utf8(log).unwrap_or_default()); + } + }); + Ok(()) } } impl Display { /// Renders the display. - fn render(&mut self, terminal: &mut Terminal) -> io::Result<()> { + fn render(&mut self, terminal: &mut Terminal, stoppable: Arc) -> io::Result<()> { let mut last_tick = Instant::now(); loop { terminal.draw(|f| self.draw(f))?; @@ -114,11 +127,7 @@ impl Display { if let Event::Key(key) = event::read()? { match key.code { KeyCode::Esc => { - // // TODO (howardwu): @ljedrz to implement a wrapping scope for Display within Node/Server. - // #[allow(unused_must_use)] - // { - // self.node.shut_down(); - // } + stoppable.stop(); return Ok(()); } KeyCode::Left => self.tabs.previous(), diff --git a/display/src/pages/logs.rs b/display/src/pages/logs.rs index d9f4bd289c..aba6054499 100644 --- a/display/src/pages/logs.rs +++ b/display/src/pages/logs.rs @@ -72,4 +72,8 @@ impl Logs { .block(Block::default().borders(Borders::ALL).style(header_style()).title("Logs")); f.render_widget(combined_logs, chunks[0]); } + + pub fn into_log_receiver(self) -> mpsc::Receiver> { + self.log_receiver + } } diff --git a/node/Cargo.toml b/node/Cargo.toml index 18028642a1..64830cab67 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -78,9 +78,6 @@ workspace = true [dependencies.num_cpus] workspace = true -[dependencies.once_cell] -workspace = true - [dependencies.parking_lot] workspace = true @@ -118,6 +115,9 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "utilities", "async" ] diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index b9e971d3f5..72d8cc3643 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -121,6 +121,9 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "utilities", "async" ] diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 1f473693e1..0f9bfad1b0 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -19,12 +19,14 @@ extern crate tracing; #[cfg(feature = "metrics")] extern crate snarkos_node_metrics as metrics; -use aleo_std::StorageMode; use snarkos_account::Account; use snarkos_node_bft::{BFT, BftCallback, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + +use aleo_std::StorageMode; use snarkvm::{ console::{account::PrivateKey, algorithms::BHP256, types::Address}, ledger::{ @@ -59,7 +61,7 @@ use std::{ net::{IpAddr, Ipv4Addr, SocketAddr}, path::PathBuf, str::FromStr, - sync::{Arc, Mutex, OnceLock, atomic::AtomicBool}, + sync::{Arc, Mutex, OnceLock}, }; use tokio::net::TcpListener; use tracing_subscriber::{ @@ -208,7 +210,7 @@ fn create_ledger( } let mut rng = TestRng::default(); let gen_ledger = genesis_ledger(*gen_key, committee.clone(), balances.clone(), node_id, &mut rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Arc::new(AtomicBool::new(false)))) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } pub type CurrentLedger = Ledger>; diff --git a/node/bft/ledger-service/Cargo.toml b/node/bft/ledger-service/Cargo.toml index 0a0c95eb79..ecfb230556 100644 --- a/node/bft/ledger-service/Cargo.toml +++ b/node/bft/ledger-service/Cargo.toml @@ -48,6 +48,9 @@ optional = true workspace = true optional = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.parking_lot] workspace = true optional = true diff --git a/node/bft/ledger-service/src/ledger.rs b/node/bft/ledger-service/src/ledger.rs index bdf506e32e..5d9a0aa0c0 100644 --- a/node/bft/ledger-service/src/ledger.rs +++ b/node/bft/ledger-service/src/ledger.rs @@ -14,6 +14,9 @@ // limitations under the License. use crate::{LedgerService, fmt_id, spawn_blocking}; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -46,16 +49,7 @@ use parking_lot::RwLock; #[cfg(not(feature = "serial"))] use rayon::prelude::*; -use std::{ - collections::BTreeMap, - fmt, - io::Read, - ops::Range, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; +use std::{collections::BTreeMap, fmt, io::Read, ops::Range, sync::Arc}; /// The capacity of the cache holding the highest blocks. const BLOCK_CACHE_SIZE: usize = 10; @@ -66,14 +60,14 @@ pub struct CoreLedgerService> { ledger: Ledger, block_cache: Arc>>>, latest_leader: Arc)>>>, - shutdown: Arc, + stoppable: Arc, } impl> CoreLedgerService { /// Initializes a new core ledger service. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { let block_cache = Arc::new(RwLock::new(BTreeMap::new())); - Self { ledger, block_cache, latest_leader: Default::default(), shutdown } + Self { ledger, block_cache, latest_leader: Default::default(), stoppable } } } @@ -371,7 +365,7 @@ impl> LedgerService for CoreLedgerService< #[cfg(feature = "ledger-write")] fn advance_to_next_block(&self, block: &Block) -> Result<()> { // If the Ctrl-C handler registered the signal, then skip advancing to the next block. - if self.shutdown.load(Ordering::Acquire) { + if self.stoppable.is_stopped() { bail!("Skipping advancing to block {} - The node is shutting down", block.height()); } // Advance to the next block. diff --git a/node/bft/ledger-service/src/translucent.rs b/node/bft/ledger-service/src/translucent.rs index c6f48a41ec..4c4fba4525 100644 --- a/node/bft/ledger-service/src/translucent.rs +++ b/node/bft/ledger-service/src/translucent.rs @@ -14,8 +14,9 @@ // limitations under the License. use crate::{CoreLedgerService, LedgerService}; -use async_trait::async_trait; -use indexmap::IndexMap; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -27,11 +28,10 @@ use snarkvm::{ }, prelude::{Address, ConsensusVersion, Field, Network, Result, narwhal::BatchCertificate}, }; -use std::{ - fmt, - ops::Range, - sync::{Arc, atomic::AtomicBool}, -}; + +use async_trait::async_trait; +use indexmap::IndexMap; +use std::{fmt, ops::Range, sync::Arc}; pub struct TranslucentLedgerService> { inner: CoreLedgerService, @@ -46,8 +46,8 @@ impl> fmt::Debug for TranslucentLedgerService impl> TranslucentLedgerService { /// Initializes a new ledger service wrapper. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { - Self { inner: CoreLedgerService::new(ledger, shutdown) } + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { + Self { inner: CoreLedgerService::new(ledger, stoppable) } } } diff --git a/node/bft/src/sync.rs b/node/bft/src/sync.rs index 70c342b131..9c30c4367e 100644 --- a/node/bft/src/sync.rs +++ b/node/bft/src/sync.rs @@ -891,6 +891,7 @@ mod tests { use snarkos_account::Account; use snarkos_node_sync::BlockSync; + use snarkos_utilities::SimpleStoppable; use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -942,7 +943,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample 5 rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { @@ -1115,7 +1116,7 @@ mod tests { // Initialize the syncing ledger. let syncing_ledger = Arc::new(CoreLedgerService::new( CurrentLedger::load(genesis, StorageMode::new_test(None)).unwrap(), - Default::default(), + SimpleStoppable::new(), )); // Initialize the gateway. let gateway = Gateway::new(account.clone(), storage.clone(), syncing_ledger.clone(), None, &[], None)?; @@ -1167,7 +1168,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { // Initialize the committee. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index d69785b72c..aa59023d81 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -18,10 +18,13 @@ use crate::common::{ TranslucentLedgerService, utils::{fire_unconfirmed_solutions, fire_unconfirmed_transactions, initialize_logger}, }; + use snarkos_account::Account; use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -150,7 +153,7 @@ impl TestNetwork { for (id, account) in accounts.into_iter().enumerate() { let gen_ledger = genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), &mut rng); - let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())); + let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())); let storage = Storage::new( ledger.clone(), Arc::new(BFTMemoryService::new()), diff --git a/node/bft/tests/common/utils.rs b/node/bft/tests/common/utils.rs index c7e1af3db6..27767dc8a5 100644 --- a/node/bft/tests/common/utils.rs +++ b/node/bft/tests/common/utils.rs @@ -14,10 +14,11 @@ // limitations under the License. use crate::common::{CurrentNetwork, TranslucentLedgerService, primary}; + use snarkos_account::Account; -use snarkos_node_bft::{Gateway, Primary, Worker, helpers::Storage}; +use snarkos_node_bft::{Gateway, Primary, Worker, helpers::Storage, storage_service::BFTMemoryService}; +use snarkos_utilities::SimpleStoppable; -use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ console::account::Address, ledger::{ @@ -180,7 +181,7 @@ pub fn sample_ledger( let gen_ledger = primary::genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } /// Samples a new storage with the given ledger. diff --git a/node/cdn/Cargo.toml b/node/cdn/Cargo.toml index 050cb382c7..09dab6e5ae 100644 --- a/node/cdn/Cargo.toml +++ b/node/cdn/Cargo.toml @@ -45,6 +45,9 @@ workspace = true optional = true features = [ "metrics" ] +[dependencies.snarkos-utilities] +workspace = true + [dependencies.rayon] workspace = true optional = true diff --git a/node/cdn/src/blocks.rs b/node/cdn/src/blocks.rs index 4b489d195e..ab9a13db10 100644 --- a/node/cdn/src/blocks.rs +++ b/node/cdn/src/blocks.rs @@ -17,6 +17,8 @@ // https://github.com/rust-lang/rust-clippy/issues/6446 #![allow(clippy::await_holding_lock)] +use snarkos_utilities::Stoppable; + use snarkvm::prelude::{ Deserialize, DeserializeOwned, @@ -87,11 +89,11 @@ impl CdnBlockSync { pub fn new>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> Self { let task = { let base_url = base_url.clone(); - tokio::spawn(async move { Self::worker(base_url, ledger, shutdown).await }) + tokio::spawn(async move { Self::worker(base_url, ledger, stoppable).await }) }; debug!("Started sync from CDN at {base_url}"); @@ -119,13 +121,13 @@ impl CdnBlockSync { async fn worker>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> SyncResult { // Fetch the node height. let start_height = ledger.latest_height() + 1; // Load the blocks from the CDN into the ledger. let ledger_clone = ledger.clone(); - let result = load_blocks(&base_url, start_height, None, shutdown, move |block: Block| { + let result = load_blocks(&base_url, start_height, None, stoppable, move |block: Block| { ledger_clone.advance_to_next_block(&block) }) .await; @@ -172,7 +174,7 @@ pub async fn load_blocks( base_url: &http::Uri, start_height: u32, end_height: Option, - shutdown: Arc, + stoppable: Arc, process: impl FnMut(Block) -> Result<()> + Clone + Send + Sync + 'static, ) -> Result { // Create a Client to maintain a connection pool throughout the sync. @@ -225,16 +227,19 @@ pub async fn load_blocks( // Spawn a background task responsible for concurrent downloads. let pending_blocks_clone = pending_blocks.clone(); let base_url = base_url.to_owned(); - let shutdown_clone = shutdown.clone(); - tokio::spawn(async move { - download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, shutdown_clone).await; - }); + + { + let stoppable = stoppable.clone(); + tokio::spawn(async move { + download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, stoppable).await; + }); + } // A loop for inserting the pending blocks into the ledger. let mut current_height = start_height.saturating_sub(1); while current_height < end_height - 1 { // If we are instructed to shut down, abort. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { info!("Stopping block sync at {} - shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -269,12 +274,12 @@ pub async fn load_blocks( // Attempt to advance the ledger using the CDN block bundle. let mut process_clone = process.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); current_height = tokio::task::spawn_blocking(move || { threadpool.install(|| { for block in next_blocks.into_iter().filter(|b| (start_height..end_height).contains(&b.height())) { // If we are instructed to shut down, abort. - if shutdown_clone.load(Ordering::Relaxed) { + if stoppable_clone.is_stopped() { info!("Stopping block sync at {} - the node is shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -314,7 +319,7 @@ async fn download_block_bundles( cdn_start: u32, cdn_end: u32, pending_blocks: Arc>>>, - shutdown: Arc, + stoppable: Arc, ) { // Keep track of the number of concurrent requests. let active_requests: Arc = Default::default(); @@ -322,7 +327,7 @@ async fn download_block_bundles( let mut start = cdn_start; while start < cdn_end - 1 { // If we are instructed to shut down, stop downloading. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { break; } @@ -356,7 +361,7 @@ async fn download_block_bundles( let base_url_clone = base_url.clone(); let pending_blocks_clone = pending_blocks.clone(); let active_requests_clone = active_requests.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); tokio::spawn(async move { // Increment the number of active requests. active_requests_clone.fetch_add(1, Ordering::Relaxed); @@ -392,7 +397,7 @@ async fn download_block_bundles( attempts += 1; if attempts > MAXIMUM_REQUEST_ATTEMPTS { warn!("Maximum number of requests to {blocks_url} reached - shutting down..."); - shutdown_clone.store(true, Ordering::Relaxed); + stoppable_clone.stop(); break; } tokio::time::sleep(Duration::from_secs(attempts as u64 * 10)).await; @@ -553,8 +558,10 @@ fn log_progress( #[cfg(test)] mod tests { - use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, log_progress}; - use crate::load_blocks; + use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, load_blocks, log_progress}; + + use snarkos_utilities::SimpleStoppable; + use snarkvm::prelude::{MainnetV0, block::Block}; use http::Uri; @@ -576,7 +583,7 @@ mod tests { let rt = tokio::runtime::Runtime::new().unwrap(); rt.block_on(async { let completed_height = - load_blocks(&testnet_cdn_url, start, end, Default::default(), process).await.unwrap(); + load_blocks(&testnet_cdn_url, start, end, SimpleStoppable::new(), process).await.unwrap(); assert_eq!(blocks.read().len(), expected); if expected > 0 { assert_eq!(blocks.read().last().unwrap().height(), completed_height); diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index 98601b0481..43beafed71 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -15,11 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}, + cdn::CdnBlockSync, + traits::NodeInterface, +}; use snarkos_account::Account; -use snarkos_node_bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}; -use snarkos_node_cdn::CdnBlockSync; use snarkos_node_rest::Rest; use snarkos_node_router::{ Heartbeat, @@ -34,6 +36,8 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ console::network::Network, ledger::{ @@ -61,7 +65,6 @@ use std::{ sync::{ Arc, atomic::{ - AtomicBool, AtomicUsize, Ordering::{Acquire, Relaxed}, }, @@ -119,10 +122,10 @@ pub struct Client> { num_verifying_executions: Arc, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, + /// The signal handling logic. + signal_handler: Arc, } impl> Client { @@ -138,16 +141,13 @@ impl> Client { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::::load(genesis.clone(), storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), signal_handler.clone())); // Determine if the client should allow external peers. let allow_external_peers = true; @@ -189,13 +189,13 @@ impl> Client { num_verifying_deploys: Default::default(), num_verifying_executions: Default::default(), handles: Default::default(), - shutdown: shutdown.clone(), + signal_handler: signal_handler.clone(), }; // Perform sync with CDN (if enabled). let cdn_sync = cdn.map(|base_url| { trace!("CDN sync is enabled"); - Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown)) + Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler)) }); // Initialize the REST server. @@ -225,8 +225,7 @@ impl> Client { node.initialize_deploy_verification(); // Initialize execution verification. node.initialize_execute_verification(); - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); + // Return the node. Ok(node) } @@ -260,8 +259,8 @@ impl> Client { self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if _self.shutdown.load(std::sync::atomic::Ordering::Acquire) { - info!("Shutting down block production"); + if _self.signal_handler.is_stopped() { + info!("Shutting down sync task"); break; } @@ -367,7 +366,7 @@ impl> Client { self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down solution verification"); break; } @@ -441,7 +440,7 @@ impl> Client { self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down deployment verification"); break; } @@ -494,7 +493,7 @@ impl> Client { self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down execution verification"); break; } @@ -554,7 +553,6 @@ impl> NodeInterface for Client { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the client..."); diff --git a/node/src/node.rs b/node/src/node.rs index 591d5a0846..5a0540d6ce 100644 --- a/node/src/node.rs +++ b/node/src/node.rs @@ -13,9 +13,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::{Client, Prover, Validator, traits::NodeInterface}; +use crate::{ + Client, + Prover, + Validator, + router::{Outbound, Router, messages::NodeType}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_router::{Outbound, Router, messages::NodeType}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{ Address, Ledger, @@ -28,10 +36,7 @@ use snarkvm::prelude::{ use aleo_std::StorageMode; use anyhow::Result; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, -}; +use std::{net::SocketAddr, sync::Arc}; #[derive(Clone)] pub enum Node { @@ -59,7 +64,7 @@ impl Node { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Validator(Arc::new( Validator::new( @@ -76,7 +81,7 @@ impl Node { allow_external_peers, dev_txs, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -90,10 +95,10 @@ impl Node { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Prover(Arc::new( - Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, shutdown).await?, + Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, signal_handler).await?, ))) } @@ -109,7 +114,7 @@ impl Node { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Client(Arc::new( Client::new( @@ -123,7 +128,7 @@ impl Node { storage_mode, rotate_external_peers, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -229,4 +234,13 @@ impl Node { Self::Client(node) => node.shut_down().await, } } + + /// Waits until the node receives a signal. + pub async fn wait_for_signals(&self, signal_handler: &SignalHandler) { + match self { + Self::Validator(node) => node.wait_for_signals(signal_handler).await, + Self::Prover(node) => node.wait_for_signals(signal_handler).await, + Self::Client(node) => node.wait_for_signals(signal_handler).await, + } + } } diff --git a/node/src/prover/mod.rs b/node/src/prover/mod.rs index b86abe01c8..17f93d2bac 100644 --- a/node/src/prover/mod.rs +++ b/node/src/prover/mod.rs @@ -15,9 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::ledger_service::ProverLedgerService, + sync::{BlockSync, Ping}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_bft::ledger_service::ProverLedgerService; use snarkos_node_router::{ Heartbeat, Inbound, @@ -26,11 +30,12 @@ use snarkos_node_router::{ Routing, messages::{Message, NodeType, UnconfirmedSolution}, }; -use snarkos_node_sync::{BlockSync, Ping}; use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ ledger::narwhal::Data, prelude::{ @@ -57,7 +62,7 @@ use std::{ net::SocketAddr, sync::{ Arc, - atomic::{AtomicBool, AtomicU8, Ordering}, + atomic::{AtomicU8, Ordering}, }, }; @@ -82,8 +87,8 @@ pub struct Prover> { max_puzzle_instances: u8, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, + /// The signal handling logic. + signal_handler: Arc, /// Keeps track of sending pings. ping: Arc>, /// PhantomData. @@ -99,11 +104,8 @@ impl> Prover { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger service. let ledger_service = Arc::new(ProverLedgerService::new()); // Determine if the prover should allow external peers. @@ -146,15 +148,13 @@ impl> Prover { max_puzzle_instances: u8::try_from(max_puzzle_instances)?, handles: Default::default(), ping, - shutdown, + signal_handler, _phantom: Default::default(), }; // Initialize the routing. node.initialize_routing().await; // Initialize the puzzle. node.initialize_puzzle().await; - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) } @@ -172,7 +172,6 @@ impl> NodeInterface for Prover { // Shut down the puzzle. debug!("Shutting down the puzzle..."); - self.shutdown.store(true, Ordering::Release); // Abort the tasks. debug!("Shutting down the prover..."); @@ -243,7 +242,7 @@ impl> Prover { } // If the Ctrl-C handler registered the signal, stop the prover. - if self.shutdown.load(Ordering::Acquire) { + if self.signal_handler.is_stopped() { debug!("Shutting down the puzzle..."); break; } diff --git a/node/src/traits.rs b/node/src/traits.rs index 0c031c0ec9..481d700bda 100644 --- a/node/src/traits.rs +++ b/node/src/traits.rs @@ -13,19 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use snarkos_node_router::{Routing, messages::NodeType}; +use crate::router::{Routing, messages::NodeType}; + +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{Address, Network, PrivateKey, ViewKey}; -use once_cell::sync::OnceCell; -use std::{ - future::Future, - io, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, - time::Duration, -}; +use std::time::Duration; #[async_trait] pub trait NodeInterface: Routing { @@ -56,65 +50,20 @@ pub trait NodeInterface: Routing { /// Handles OS signals for the node to intercept and perform a clean shutdown. /// The optional `shutdown_flag` flag can be used to cleanly terminate the syncing process. - fn handle_signals(shutdown_flag: Arc) -> Arc> { - // In order for the signal handler to be started as early as possible, a reference to the node needs - // to be passed to it at a later time. - let node: Arc> = Default::default(); - - #[cfg(target_family = "unix")] - fn signal_listener() -> impl Future> { - use tokio::signal::unix::{SignalKind, signal}; - - // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. - let mut s_int = signal(SignalKind::interrupt()).unwrap(); - let mut s_term = signal(SignalKind::terminate()).unwrap(); - let mut s_quit = signal(SignalKind::quit()).unwrap(); - let mut s_hup = signal(SignalKind::hangup()).unwrap(); - - // Return when any of the signals above is received. - async move { - tokio::select!( - _ = s_int.recv() => (), - _ = s_term.recv() => (), - _ = s_quit.recv() => (), - _ = s_hup.recv() => (), - ); - Ok(()) - } - } - #[cfg(not(target_family = "unix"))] - fn signal_listener() -> impl Future> { - tokio::signal::ctrl_c() - } - - let node_clone = node.clone(); - tokio::task::spawn(async move { - match signal_listener().await { - Ok(()) => { - warn!("=========================================================================================="); - warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); - warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); - warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); - warn!("=========================================================================================="); - - match node_clone.get() { - // If the node is already initialized, then shut it down. - Some(node) => node.shut_down().await, - // Otherwise, if the node is not yet initialized, then set the shutdown flag directly. - None => shutdown_flag.store(true, Ordering::Relaxed), - } - - // A best-effort attempt to let any ongoing activity conclude. - tokio::time::sleep(Duration::from_secs(3)).await; - - // Terminate the process. - std::process::exit(0); - } - Err(error) => error!("tokio::signal::ctrl_c encountered an error: {}", error), - } - }); - - node + async fn wait_for_signals(&self, handler: &SignalHandler) { + handler.wait_for_signals().await; + + warn!("=========================================================================================="); + warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); + warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); + warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); + warn!("=========================================================================================="); + + // If the node is already initialized, then shut it down. + self.shut_down().await; + + // A best-effort attempt to let any ongoing activity conclude. + tokio::time::sleep(Duration::from_secs(3)).await; } /// Shuts down the node. diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 4f53c71ba7..155928ad6c 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -35,6 +35,8 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::SignalHandler; + use snarkvm::{ prelude::{ Ledger, @@ -53,11 +55,7 @@ use core::future::Future; use locktick::parking_lot::Mutex; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, - time::Duration, -}; +use std::{net::SocketAddr, sync::Arc, time::Duration}; /// A validator is a full node, capable of validating blocks. #[derive(Clone)] @@ -74,8 +72,6 @@ pub struct Validator> { sync: Arc>, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, } @@ -96,16 +92,13 @@ impl> Validator { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::load(genesis, storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), signal_handler.clone())); // Determine if the validator should rotate external peers. let rotate_external_peers = false; @@ -152,11 +145,10 @@ impl> Validator { sync: sync.clone(), ping, handles: Default::default(), - shutdown: shutdown.clone(), }; // Perform sync with CDN (if enabled). - let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown))); + let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler))); // Initialize the transaction pool. node.initialize_transaction_pool(dev, dev_txs)?; @@ -188,8 +180,6 @@ impl> Validator { // Initialize the routing. node.initialize_routing().await; - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) } @@ -465,7 +455,6 @@ impl> NodeInterface for Validator { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the validator..."); @@ -535,7 +524,7 @@ mod tests { false, dev_txs, None, - Default::default(), + SignalHandler::new(), ) .await .unwrap(); diff --git a/node/tests/common/node.rs b/node/tests/common/node.rs index dae081b62d..b83d22ca1c 100644 --- a/node/tests/common/node.rs +++ b/node/tests/common/node.rs @@ -14,8 +14,11 @@ // limitations under the License. use crate::common::test_peer::sample_genesis_block; + use snarkos_account::Account; use snarkos_node::{Client, Prover, Validator}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{MainnetV0 as CurrentNetwork, store::helpers::memory::ConsensusMemory}; use aleo_std::StorageMode; @@ -39,7 +42,7 @@ pub async fn client() -> Client> StorageMode::new_test(None), false, // No extra peer rotation. None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create client instance") @@ -53,7 +56,7 @@ pub async fn prover() -> Prover> sample_genesis_block(), StorageMode::new_test(None), None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create prover instance") @@ -74,7 +77,7 @@ pub async fn validator() -> Validator { + Err((msg, backtrace)) => { + print_error!("⚠️ {}\n", msg.replace("panicked at", "snarkOS encountered an unexpected error at")); + + // Always show backtraces. + let mut msg = "Backtrace:\n".to_string(); + msg.push_str(" [...]\n"); + + // Remove all the low level frames. + // This can be done more cleanly once the `backtrace_frames` feature is stabilized. + let backtrace = backtrace.to_string(); + let lines = backtrace.lines().skip_while(|line| !line.contains("core::panicking")); + + for line in lines { + // Stop printing once we hit the panic handler. + if line.contains("snarkos::main") { + break; + } + + msg.push_str(&format!("{line}\n")); + } + + // Print the entire backtrace as a single log message. + print_error!("{msg}"); + // Print some information for the end-user. print_error!( "This is most likely a bug!\n\ Please report it to the snarkOS developers: https://github.com/ProvableHQ/snarkOS/issues/new?template=bug.md" diff --git a/utilities/Cargo.toml b/utilities/Cargo.toml new file mode 100644 index 0000000000..7ad655cf95 --- /dev/null +++ b/utilities/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "snarkos-utilities" +version = "4.2.1" +authors = [ "The Aleo Team " ] +description = "Utilities for a decentralized operating system" +homepage = "https://aleo.org" +repository = "https://github.com/ProvableHQ/snarkOS" +keywords = [ + "aleo", + "cryptography", + "blockchain", + "decentralized", + "zero-knowledge" +] +categories = [ "cryptography", "cryptography::cryptocurrencies", "os" ] +license = "Apache-2.0" +edition = "2024" + +[dependencies.tokio] +workspace = true +features = [ "macros", "signal" ] + +[dependencies.tracing] +workspace = true diff --git a/utilities/LICENSE.md b/utilities/LICENSE.md new file mode 100644 index 0000000000..d0af96c393 --- /dev/null +++ b/utilities/LICENSE.md @@ -0,0 +1,194 @@ +Apache License +============== + +_Version 2.0, January 2004_ +_<>_ + +### Terms and Conditions for use, reproduction, and distribution + +#### 1. Definitions + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means **(i)** the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the +outstanding shares, or **(iii)** beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +“submitted” means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +#### 2. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +#### 3. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +#### 4. Redistribution + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +* **(a)** You must give any other recipients of the Work or Derivative Works a copy of +this License; and +* **(b)** You must cause any modified files to carry prominent notices stating that You +changed the files; and +* **(c)** You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +* **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +#### 5. Submission of Contributions + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +#### 6. Trademarks + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +#### 7. Disclaimer of Warranty + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +#### 8. Limitation of Liability + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +#### 9. Accepting Warranty or Additional Liability + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +_END OF TERMS AND CONDITIONS_ + +### APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets `[]` replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same “printed page” as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs new file mode 100644 index 0000000000..2e53d8550f --- /dev/null +++ b/utilities/src/lib.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod signals; +pub use signals::*; diff --git a/utilities/src/signals.rs b/utilities/src/signals.rs new file mode 100644 index 0000000000..c2b0e9cd9c --- /dev/null +++ b/utilities/src/signals.rs @@ -0,0 +1,132 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, +}; +use tokio::sync::Notify; + +use tracing::{debug, error}; + +/// Generic trait that can be queried for whether current process should be stopped. +/// This is implemented by `SignalHandler` and `SimpleStoppable`. +pub trait Stoppable: Send + Sync { + fn stop(&self); + fn is_stopped(&self) -> bool; +} + +/// Wrapper around `AtomicBool` that implements the `Stoppable` trait. +/// +/// This is useful when no signal or complex shutdown handling is necessary. +pub struct SimpleStoppable { + state: AtomicBool, +} + +impl SimpleStoppable { + pub fn new() -> Arc { + Arc::new(Self { state: AtomicBool::new(false) }) + } +} + +impl Stoppable for SimpleStoppable { + fn stop(&self) { + self.state.store(true, Ordering::SeqCst); + } + + fn is_stopped(&self) -> bool { + self.state.load(Ordering::SeqCst) + } +} + +/// Helper for signal handling +pub struct SignalHandler { + stopped: AtomicBool, + notify: Notify, +} + +impl SignalHandler { + pub fn new() -> Arc { + let obj = Arc::new(Self { stopped: AtomicBool::new(false), notify: Default::default() }); + + { + let obj = obj.clone(); + tokio::spawn(async move { + obj.handle_signals().await; + }); + } + + obj + } + + /// Background task that wait for signal. + async fn handle_signals(&self) { + #[cfg(target_family = "unix")] + let signal_listener = async move { + use tokio::signal::unix::{SignalKind, signal}; + + // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. + let mut s_int = signal(SignalKind::interrupt())?; + let mut s_term = signal(SignalKind::terminate())?; + let mut s_quit = signal(SignalKind::quit())?; + let mut s_hup = signal(SignalKind::hangup())?; + + tokio::select!( + _ = s_int.recv() => debug!("Received SIGINT"), + _ = s_term.recv() => debug!("Received SIGTERM"), + _ = s_quit.recv() => debug!("Received SIGQUIT"), + _ = s_hup.recv() => debug!("Received SIGHUP"), + ); + + std::io::Result::<()>::Ok(()) + }; + + #[cfg(not(target_family = "unix"))] + let signal_listener = async move { + tokio::signal::ctrl_c()?; + debug!("Got signal"); + Ok(()) + }; + + // Block until the signal. + match signal_listener.await { + Ok(()) => {} + Err(error) => { + error!("tokio::signal encountered an error: {error}"); + } + } + + self.stop(); + } + + /// Blocks until the signal handler was invoked. + /// Note: This can only be called once, and must not be called concurrently. + pub async fn wait_for_signals(&self) { + while !self.is_stopped() { + self.notify.notified().await + } + } +} + +impl Stoppable for SignalHandler { + fn stop(&self) { + self.stopped.store(true, Ordering::SeqCst); + self.notify.notify_one(); + } + + fn is_stopped(&self) -> bool { + self.stopped.load(Ordering::SeqCst) + } +}