diff --git a/.circleci/config.yml b/.circleci/config.yml index d924f44dea..099ca13c50 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -125,8 +125,13 @@ commands: - setup_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache - run: + name: "Build Tests" no_output_timeout: 30m - command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> --no-run + - run: + name: "Run Tests" + no_output_timeout: 30m + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 RUST_BACKTRACE=1 RUST_LOG=snarkos=trace cargo test << parameters.flags >> - clear_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache diff --git a/Cargo.lock b/Cargo.lock index d57e13b77e..b97ffc964a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,7 @@ dependencies = [ "cookie", "document-features", "idna", - "indexmap 2.11.1", + "indexmap 2.11.3", "log", "serde", "serde_derive", @@ -1454,7 +1454,7 @@ dependencies = [ "js-sys", "libc", "r-efi", - "wasi 0.14.6+wasi-0.2.4", + "wasi 0.14.7+wasi-0.2.4", "wasm-bindgen", ] @@ -1518,7 +1518,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.11.1", + "indexmap 2.11.3", "slab", "tokio", "tokio-util", @@ -1944,14 +1944,15 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.1" +version = "2.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921" +checksum = "92119844f513ffa41556430369ab02c295a3578af21cf945caa3e9e0c2481ac3" dependencies = [ "equivalent", "hashbrown 0.15.5", "rayon", "serde", + "serde_core", ] [[package]] @@ -2038,15 +2039,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2083,9 +2075,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.78" +version = "0.3.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" +checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e" dependencies = [ "once_cell", "wasm-bindgen", @@ -2234,6 +2226,16 @@ name = "locktick" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307f02aff720d58003290879abe635b818b2176488c5ba2855ab9c11b4e0c04e" +dependencies = [ + "backtrace", + "parking_lot", + "simple_moving_average", +] + +[[package]] +name = "locktick" +version = "0.3.0" +source = "git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard#014d3940ae8c52e860d7ed539f8c3e9452b769ff" dependencies = [ "backtrace", "parking_lot", @@ -2321,7 +2323,7 @@ dependencies = [ "base64 0.21.7", "hyper 0.14.32", "hyper-tls 0.5.0", - "indexmap 2.11.1", + "indexmap 2.11.3", "ipnet", "metrics", "metrics-util", @@ -2391,14 +2393,13 @@ dependencies = [ [[package]] name = "mockall" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43766c2b5203b10de348ffe19f7e54564b64f3d6018ff7648d1e2d6d3a0f0a48" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" dependencies = [ "cfg-if", "downcast", "fragile", - "lazy_static", "mockall_derive", "predicates", "predicates-tree", @@ -2406,9 +2407,9 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cbce79ec385a1d4f54baa90a76401eb15d9cab93685f62e7e9f942aa00ae2" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" dependencies = [ "cfg-if", "proc-macro2", @@ -2515,16 +2516,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "num-format" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" -dependencies = [ - "arrayvec", - "itoa", -] - [[package]] name = "num-integer" version = "0.1.46" @@ -3477,9 +3468,9 @@ checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b" +checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d" dependencies = [ "serde_core", "serde_derive", @@ -3487,18 +3478,18 @@ dependencies = [ [[package]] name = "serde_core" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab" +checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.224" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0" +checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -3511,7 +3502,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "itoa", "memchr", "ryu", @@ -3561,7 +3552,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.11.1", + "indexmap 2.11.3", "schemars 0.9.0", "schemars 1.0.4", "serde", @@ -3720,7 +3711,7 @@ version = "4.2.1" dependencies = [ "built", "clap", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rusty-hook", "snarkos-account", "snarkos-cli", @@ -3759,8 +3750,8 @@ dependencies = [ "clap", "colored 3.0.0", "crossterm 0.29.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "nix", "num_cpus", "parking_lot", @@ -3776,6 +3767,7 @@ dependencies = [ "snarkos-node-cdn", "snarkos-node-metrics", "snarkos-node-rest", + "snarkos-utilities", "snarkvm", "sys-info", "tempfile", @@ -3796,6 +3788,7 @@ dependencies = [ "crossterm 0.29.0", "ratatui", "snarkos-node", + "snarkos-utilities", "snarkvm", "tokio", ] @@ -3811,11 +3804,10 @@ dependencies = [ "deadline", "futures-util", "http 1.3.1", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "num_cpus", - "once_cell", "parking_lot", "paste", "pea2pea", @@ -3830,12 +3822,14 @@ dependencies = [ "snarkos-node-router", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "time", "tokio", "tokio-util", "tracing", "tracing-subscriber", + "tracing-test", ] [[package]] @@ -3853,9 +3847,9 @@ dependencies = [ "colored 3.0.0", "deadline", "futures", - "indexmap 2.11.1", - "itertools 0.12.1", - "locktick", + "indexmap 2.11.3", + "itertools 0.14.0", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "mockall", "open", @@ -3876,6 +3870,7 @@ dependencies = [ "snarkos-node-metrics", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "test-strategy 0.4.3", "time", @@ -3894,7 +3889,7 @@ version = "4.2.1" dependencies = [ "anyhow", "bytes", - "indexmap 2.11.1", + "indexmap 2.11.3", "proptest", "serde", "snarkos-node-sync-locators", @@ -3911,12 +3906,13 @@ version = "4.2.1" dependencies = [ "anyhow", "async-trait", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rand 0.8.5", "rayon", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tracing", @@ -3928,8 +3924,8 @@ version = "4.2.1" dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "parking_lot", "snarkvm", @@ -3944,13 +3940,14 @@ dependencies = [ "bincode", "colored 3.0.0", "http 1.3.1", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rayon", "reqwest", "serde", "serde_json", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tokio-test", @@ -3963,17 +3960,16 @@ version = "4.2.1" dependencies = [ "aleo-std", "anyhow", + "async-trait", "colored 3.0.0", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "lru 0.16.1", "once_cell", "parking_lot", "snarkos-account", "snarkos-node-bft", - "snarkos-node-bft-ledger-service", - "snarkos-node-bft-storage-service", "snarkos-node-metrics", "snarkos-node-sync", "snarkvm", @@ -3986,7 +3982,7 @@ dependencies = [ name = "snarkos-node-metrics" version = "4.2.1" dependencies = [ - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "metrics-exporter-prometheus", "parking_lot", "rayon", @@ -4004,9 +4000,9 @@ dependencies = [ "base64 0.22.1", "built", "http 1.3.1", - "indexmap 2.11.1", + "indexmap 2.11.3", "jsonwebtoken", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "once_cell", "parking_lot", "rand 0.8.5", @@ -4039,7 +4035,7 @@ dependencies = [ "futures", "futures-util", "linked-hash-map", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "peak_alloc", "rand 0.8.5", @@ -4059,6 +4055,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "tracing-test", ] [[package]] @@ -4082,9 +4079,9 @@ version = "4.2.1" dependencies = [ "anyhow", "futures", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "parking_lot", "rand 0.8.5", "serde", @@ -4113,7 +4110,7 @@ name = "snarkos-node-sync-locators" version = "4.2.1" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "serde", "snarkvm", "tracing", @@ -4126,7 +4123,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "locktick", + "locktick 0.3.0 (git+https://github.com/kaimast/locktick.git?branch=fix%2Fexport-lock-guard)", "once_cell", "parking_lot", "snarkos-node-metrics", @@ -4136,15 +4133,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "snarkos-utilities" +version = "4.2.1" +dependencies = [ + "tokio", + "tracing", +] + [[package]] name = "snarkvm" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "anstyle", "anyhow", "dotenvy", - "num-format", "rand 0.8.5", "serde_json", "snarkvm-algorithms", @@ -4164,7 +4167,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4173,7 +4176,7 @@ dependencies = [ "fxhash", "hashbrown 0.15.5", "hex", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", "num-traits", "rand 0.8.5", @@ -4192,7 +4195,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms-cuda" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "blst", "cc", @@ -4203,7 +4206,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4217,7 +4220,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-network", "snarkvm-circuit-types", @@ -4227,7 +4230,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-types", "snarkvm-console-algorithms", @@ -4237,7 +4240,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-types", @@ -4247,9 +4250,9 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", "nom", "num-traits", @@ -4265,12 +4268,12 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment-witness" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" [[package]] name = "snarkvm-circuit-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-collections", @@ -4281,7 +4284,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4295,7 +4298,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-address", @@ -4310,7 +4313,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4323,7 +4326,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-console-types-boolean", @@ -4332,7 +4335,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4342,7 +4345,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4354,7 +4357,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4366,7 +4369,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4377,7 +4380,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4389,7 +4392,7 @@ dependencies = [ [[package]] name = "snarkvm-console" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-account", "snarkvm-console-algorithms", @@ -4402,7 +4405,7 @@ dependencies = [ [[package]] name = "snarkvm-console-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bs58", "snarkvm-console-network", @@ -4413,7 +4416,7 @@ dependencies = [ [[package]] name = "snarkvm-console-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "blake2s_simd", "smallvec", @@ -4426,7 +4429,7 @@ dependencies = [ [[package]] name = "snarkvm-console-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "rayon", @@ -4437,11 +4440,11 @@ dependencies = [ [[package]] name = "snarkvm-console-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "enum-iterator", - "indexmap 2.11.1", + "indexmap 2.11.3", "lazy_static", "paste", "serde", @@ -4457,7 +4460,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "bech32", @@ -4475,12 +4478,12 @@ dependencies = [ [[package]] name = "snarkvm-console-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "enum-iterator", "enum_index", "enum_index_derive", - "indexmap 2.11.1", + "indexmap 2.11.3", "num-derive", "num-traits", "serde_json", @@ -4495,7 +4498,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-address", @@ -4510,7 +4513,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4521,7 +4524,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", ] @@ -4529,7 +4532,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4539,7 +4542,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4550,7 +4553,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4561,7 +4564,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4572,7 +4575,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4583,7 +4586,7 @@ dependencies = [ [[package]] name = "snarkvm-curves" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "rand 0.8.5", "rayon", @@ -4597,7 +4600,7 @@ dependencies = [ [[package]] name = "snarkvm-fields" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4614,12 +4617,12 @@ dependencies = [ [[package]] name = "snarkvm-ledger" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4641,7 +4644,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-authority" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "rand 0.8.5", @@ -4653,10 +4656,10 @@ dependencies = [ [[package]] name = "snarkvm-ledger-block" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4675,10 +4678,10 @@ dependencies = [ [[package]] name = "snarkvm-ledger-committee" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "proptest", "rand 0.8.5", "rand_chacha 0.3.1", @@ -4694,7 +4697,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-ledger-narwhal-batch-certificate", "snarkvm-ledger-narwhal-batch-header", @@ -4707,9 +4710,9 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-certificate" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4720,9 +4723,9 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-header" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4733,7 +4736,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-data" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bytes", "serde_json", @@ -4744,9 +4747,9 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-subdag" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "rayon", "serde_json", "snarkvm-console", @@ -4759,7 +4762,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bytes", "serde_json", @@ -4772,7 +4775,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission-id" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "snarkvm-console", "snarkvm-ledger-puzzle", @@ -4781,13 +4784,13 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", "bincode", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4801,13 +4804,13 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle-epoch" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", "colored 3.0.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4824,7 +4827,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-query" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "anyhow", "async-trait", @@ -4841,13 +4844,13 @@ dependencies = [ [[package]] name = "snarkvm-ledger-store" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std-storage", "anyhow", "bincode", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "rayon", "rocksdb", @@ -4868,7 +4871,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-test-helpers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4886,7 +4889,7 @@ dependencies = [ [[package]] name = "snarkvm-metrics" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "metrics", ] @@ -4894,7 +4897,7 @@ dependencies = [ [[package]] name = "snarkvm-parameters" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", @@ -4903,7 +4906,7 @@ dependencies = [ "curl", "hex", "lazy_static", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "paste", "rand 0.8.5", @@ -4917,13 +4920,13 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", - "indexmap 2.11.1", + "indexmap 2.11.3", "itertools 0.14.0", - "locktick", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4950,12 +4953,12 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-process" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "colored 3.0.0", - "indexmap 2.11.1", - "locktick", + "indexmap 2.11.3", + "locktick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot", "rand 0.8.5", "rand_chacha 0.3.1", @@ -4975,9 +4978,9 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "paste", "rand 0.8.5", "rand_chacha 0.3.1", @@ -4993,7 +4996,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-snark" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "bincode", "serde_json", @@ -5006,11 +5009,12 @@ dependencies = [ [[package]] name = "snarkvm-utilities" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "aleo-std", "anyhow", "bincode", + "colored 3.0.0", "num-bigint", "num_cpus", "rand 0.8.5", @@ -5021,6 +5025,7 @@ dependencies = [ "smol_str", "snarkvm-utilities-derives", "thiserror 2.0.16", + "tokio", "tracing", "zeroize", ] @@ -5028,7 +5033,7 @@ dependencies = [ [[package]] name = "snarkvm-utilities-derives" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=35c82646eeb2e9561be#35c82646eeb2e9561be9b8d549d1f7532dfe2b22" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#cec41dd0faeef595293e4b91894ce81ce1227fc9" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -5498,9 +5503,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd" dependencies = [ "rustls", "tokio", @@ -5558,7 +5563,7 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae2a4cf385da23d1d53bc15cdfa5c2109e93d8d362393c801e87da2f72f0e201" dependencies = [ - "indexmap 2.11.1", + "indexmap 2.11.3", "serde_core", "serde_spanned", "toml_datetime", @@ -5965,9 +5970,9 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.14.6+wasi-0.2.4" +version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f71243a3f320c00a8459e455c046ce571229c2f31fd11645d9dc095e3068ca0" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" dependencies = [ "wasip2", ] @@ -5983,9 +5988,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" +checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819" dependencies = [ "cfg-if", "once_cell", @@ -5996,9 +6001,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" +checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c" dependencies = [ "bumpalo", "log", @@ -6010,9 +6015,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.51" +version = "0.4.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca85039a9b469b38336411d6d6ced91f3fc87109a2a27b0c197663f5144dffe" +checksum = "a0b221ff421256839509adbb55998214a70d829d3a28c69b4a6672e9d2a42f67" dependencies = [ "cfg-if", "js-sys", @@ -6023,9 +6028,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" +checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0" dependencies = [ "quote 1.0.40", "wasm-bindgen-macro-support", @@ -6033,9 +6038,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" +checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32" dependencies = [ "proc-macro2", "quote 1.0.40", @@ -6046,18 +6051,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" +checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.78" +version = "0.3.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e4b637749ff0d92b8fad63aa1f7cff3cbe125fd49c175cd6345e7272638b12" +checksum = "fbe734895e869dc429d78c4b433f8d17d95f8d05317440b4fad5ab2d33e596dc" dependencies = [ "js-sys", "wasm-bindgen", @@ -6548,7 +6553,7 @@ dependencies = [ "crossbeam-utils", "displaydoc", "flate2", - "indexmap 2.11.1", + "indexmap 2.11.3", "memchr", "thiserror 2.0.16", "time", diff --git a/Cargo.toml b/Cargo.toml index e922558d34..d46bb0f0be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,8 @@ members = [ "node/sync", "node/sync/communication-service", "node/sync/locators", - "node/tcp" + "node/tcp", + "utilities", ] [workspace.dependencies.aleo-std] @@ -46,7 +47,8 @@ default-features = false [workspace.dependencies.snarkvm] #path = "../snarkVM" git = "https://github.com/ProvableHQ/snarkVM.git" -rev = "35c82646eeb2e9561be" +#rev = "35c82646eeb2e9561be" +branch = "feat/track-error" #version = "=4.2.1" default-features = false #features = [ "circuit", "console", "rocks" ] @@ -94,7 +96,9 @@ version = "0.3" version = "0.3" [workspace.dependencies.locktick] -version = "0.3" +#version = "0.3" +git = "https://github.com/kaimast/locktick.git" +branch = "fix/export-lock-guard" [workspace.dependencies.lru] version = "0.16" @@ -240,6 +244,10 @@ version = "=4.2.1" path = "node/tcp" version = "=4.2.1" +[workspace.dependencies.snarkos-utilities] +path = "utilities" +version = "=4.2.1" + [[bin]] name = "snarkos" path = "snarkos/main.rs" diff --git a/build.rs b/build.rs index a807f82d0b..e5f5c9edcc 100644 --- a/build.rs +++ b/build.rs @@ -126,8 +126,9 @@ fn check_locktick_imports>(path: P) { } // If the file has a lock import "imbalance", print it out and increment the counter. + // Allow having more locktick, than regular, imports. assert!( - lock_balance == 0, + lock_balance <= 0, "The locks in \"{}\" don't seem to have `locktick` counterparts!", entry.path().display() ); diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5d2d04d82d..9fdaa256a5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -118,6 +118,9 @@ workspace = true [dependencies.snarkos-node-rest] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "parameters", "circuit", "package" ] diff --git a/cli/src/commands/developer/scan.rs b/cli/src/commands/developer/scan.rs index 3284e82baf..41e5add81a 100644 --- a/cli/src/commands/developer/scan.rs +++ b/cli/src/commands/developer/scan.rs @@ -17,6 +17,8 @@ use super::DEFAULT_ENDPOINT; use crate::helpers::{args::prepare_endpoint, dev::get_development_key}; use snarkos_node_cdn::CDN_BASE_URL; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::network::Network, prelude::{Ciphertext, Field, FromBytes, Plaintext, PrivateKey, Record, ViewKey, block::Block}, @@ -285,7 +287,7 @@ impl Scan { let rt = tokio::runtime::Runtime::new()?; // Create a placeholder shutdown flag. - let _shutdown = Default::default(); + let _shutdown = SimpleStoppable::new(); // Copy endpoint for background task. let endpoint = endpoint.clone(); diff --git a/cli/src/commands/start.rs b/cli/src/commands/start.rs index 5825ac821c..42e96cb2cd 100644 --- a/cli/src/commands/start.rs +++ b/cli/src/commands/start.rs @@ -23,6 +23,8 @@ use snarkos_node::{ rest::DEFAULT_REST_PORT, router::{DEFAULT_NODE_PORT, messages::NodeType}, }; +use snarkos_utilities::SignalHandler; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -54,7 +56,10 @@ use std::{ path::PathBuf, sync::{Arc, atomic::AtomicBool}, }; -use tokio::runtime::{self, Runtime}; +use tokio::{ + runtime::{self, Runtime}, + sync::mpsc, +}; use ureq::http; /// The recommended minimum number of 'open files' limit for a validator. @@ -246,7 +251,7 @@ pub struct Start { } impl Start { - /// Starts the snarkOS node. + /// Starts the snarkOS node and blocks until it terminates. pub fn parse(self) -> Result { // Prepare the shutdown flag. let shutdown: Arc = Default::default(); @@ -264,45 +269,32 @@ impl Start { // Initialize the runtime. Self::runtime().block_on(async move { // Error messages. - let node_parse_error = || "Failed to parse node arguments"; - let display_start_error = || "Failed to initialize the display"; + let node_parse_error = || "Failed to start node"; + let signal_handler = SignalHandler::new(); // Clone the configurations. - let mut cli = self.clone(); - // Parse the network. - match cli.network { - MainnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - TestnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - CanaryV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } + let mut self_ = self.clone(); + + // Parse the node arguments, start it, and block until shutdown. + match self_.network { + MainnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + + TestnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + CanaryV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, _ => panic!("Invalid network ID specified"), }; - // Note: Do not move this. The pending await must be here otherwise - // other snarkOS commands will not exit. - std::future::pending::<()>().await; + + // Wait until the node is stopped due to a signal (e.g., Ctrl+C). + Ok(String::new()) }) } @@ -601,9 +593,9 @@ impl Start { } } - /// Returns the node type corresponding to the given configurations. + /// Start the node and blocks until it terminates. #[rustfmt::skip] - async fn parse_node(&mut self, shutdown: Arc) -> Result> { + async fn parse_node(&mut self, log_receiver: mpsc::Receiver>, signal_handler: Arc) -> Result<()> { if !self.nobanner { // Print the welcome banner. println!("{}", crate::helpers::welcome_message()); @@ -712,21 +704,27 @@ impl Start { } }; - // TODO(kaimast): start the display earlier and show sync progress. if !self.nodisplay && !self.nocdn { println!("🪧 The terminal UI will not start until the node has finished syncing from the CDN. If this step takes too long, consider restarting with `--nodisplay`."); } // Initialize the node. - match node_type { - NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, shutdown.clone()).await, - NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, shutdown.clone()).await, - NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, shutdown).await + let node = match node_type { + NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, signal_handler.clone()).await, + NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, signal_handler.clone()).await, + NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, signal_handler.clone()).await + }?; + + if !self.nodisplay { + Display::start(node.clone(), log_receiver, signal_handler.clone()).with_context(|| "Failed to start the display")?; } + + node.wait_for_signals(&signal_handler).await; + Ok(()) } - /// Returns a runtime for the node. + /// Starts a rayon thread pool and tokio runtime for the node, and returns the tokio `Runtime`. fn runtime() -> Runtime { // Retrieve the number of cores. let num_cores = num_cpus::get(); @@ -737,14 +735,17 @@ impl Start { let (num_tokio_worker_threads, max_tokio_blocking_threads, num_rayon_cores_global) = (2 * num_cores, 512, num_cores); - // Initialize the parallelization parameters. + // Set up the rayon thread pool. + // A custom panic handler is not needed here, as rayon propagates the panic to the calling thread by default (except for `rayon::spawn` which we do not use). rayon::ThreadPoolBuilder::new() .stack_size(8 * 1024 * 1024) .num_threads(num_rayon_cores_global) .build_global() .unwrap(); - // Initialize the runtime configuration. + // Set up the tokio Runtime. + // TODO(kaimast): set up a panic handler here for each worker thread once [`tokio::runtime::Builder::unhandled_panic`](https://docs.rs/tokio/latest/tokio/runtime/struct.Builder.html#method.unhandled_panic) is stabilized. + // As of now, detached tasks may panic and the error may not be handled by the top-level `catch_unwind`. runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(8 * 1024 * 1024) diff --git a/display/Cargo.toml b/display/Cargo.toml index ad46b17108..d3156670b5 100644 --- a/display/Cargo.toml +++ b/display/Cargo.toml @@ -28,6 +28,9 @@ version = "0.29" [dependencies.snarkos-node] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true diff --git a/display/src/lib.rs b/display/src/lib.rs index 1a03058594..1dda3193b9 100644 --- a/display/src/lib.rs +++ b/display/src/lib.rs @@ -22,6 +22,8 @@ mod tabs; use tabs::Tabs; use snarkos_node::Node; +use snarkos_utilities::Stoppable; + use snarkvm::prelude::Network; use anyhow::Result; @@ -41,6 +43,8 @@ use ratatui::{ }; use std::{ io, + io::Write, + sync::Arc, thread, time::{Duration, Instant}, }; @@ -67,7 +71,7 @@ fn content_style() -> Style { impl Display { /// Initializes a new display. - pub fn start(node: Node, log_receiver: Receiver>) -> Result<()> { + pub fn start(node: Node, log_receiver: Receiver>, stoppable: Arc) -> Result<()> { // Initialize the display. enable_raw_mode()?; let mut stdout = io::stdout(); @@ -84,25 +88,34 @@ impl Display { }; // Render the display. - let res = display.render(&mut terminal); + let res = display.render(&mut terminal, stoppable); // Terminate the display. disable_raw_mode()?; execute!(terminal.backend_mut(), LeaveAlternateScreen, DisableMouseCapture)?; terminal.show_cursor()?; - // Exit. + // Print any error that may have occurred. if let Err(err) = res { - println!("{err:?}") + eprintln!("{err:?}"); } + // Write any remaining log output to stdout while the node is shutting down. + let mut log_receiver = display.logs.into_log_receiver(); + tokio::spawn(async move { + let mut stdout = io::stdout(); + while let Some(log) = log_receiver.recv().await { + let _ = write!(stdout, "{}", String::from_utf8(log).unwrap_or_default()); + } + }); + Ok(()) } } impl Display { /// Renders the display. - fn render(&mut self, terminal: &mut Terminal) -> io::Result<()> { + fn render(&mut self, terminal: &mut Terminal, stoppable: Arc) -> io::Result<()> { let mut last_tick = Instant::now(); loop { terminal.draw(|f| self.draw(f))?; @@ -114,11 +127,7 @@ impl Display { if let Event::Key(key) = event::read()? { match key.code { KeyCode::Esc => { - // // TODO (howardwu): @ljedrz to implement a wrapping scope for Display within Node/Server. - // #[allow(unused_must_use)] - // { - // self.node.shut_down(); - // } + stoppable.stop(); return Ok(()); } KeyCode::Left => self.tabs.previous(), diff --git a/display/src/pages/logs.rs b/display/src/pages/logs.rs index d9f4bd289c..aba6054499 100644 --- a/display/src/pages/logs.rs +++ b/display/src/pages/logs.rs @@ -72,4 +72,8 @@ impl Logs { .block(Block::default().borders(Borders::ALL).style(header_style()).title("Logs")); f.render_widget(combined_logs, chunks[0]); } + + pub fn into_log_receiver(self) -> mpsc::Receiver> { + self.log_receiver + } } diff --git a/node/Cargo.toml b/node/Cargo.toml index 330b2a8a8b..64830cab67 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -78,9 +78,6 @@ workspace = true [dependencies.num_cpus] workspace = true -[dependencies.once_cell] -workspace = true - [dependencies.parking_lot] workspace = true @@ -118,8 +115,12 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true +features = [ "utilities", "async" ] [dependencies.time] workspace = true @@ -154,6 +155,9 @@ features = [ "test" ] workspace = true features = [ "env-filter", "fmt" ] +[dev-dependencies.tracing-test] +workspace = true + [dev-dependencies.rand_chacha] workspace = true diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 61ab97f8b5..72d8cc3643 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -38,11 +38,15 @@ cuda = [ "snarkos-node-bft-ledger-service/cuda", "snarkos-node-sync/cuda" ] +persistent-storage = [ "snarkos-node-bft-storage-service/persistent" ] test = [ # "snarkvm/test" this breaks some of the tests - "snarkvm/test-helpers", "snarkos-node-bft-ledger-service/test", - "snarkos-node-bft-storage-service/test" + "snarkos-node-bft-storage-service/test", + "test-helpers" +] +test-helpers = [ + "snarkvm/test-helpers", ] serial = [ "snarkos-node-bft-ledger-service/serial" ] @@ -117,9 +121,12 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true -features = [ "utilities" ] +features = [ "utilities", "async" ] [dependencies.time] workspace = true @@ -153,7 +160,7 @@ features = [ "derive" ] version = "0.2" [dev-dependencies.itertools] -version = "0.12" +version = "0.14" [dev-dependencies.open] version = "5" @@ -199,4 +206,9 @@ features = [ "env-filter" ] workspace = true [dev-dependencies.mockall] -version = "0.12.1" +version = "0.13" + +[[test]] +name = "gateway-e2e" +path = "./tests/gateway_e2e.rs" +required-features = [ "test-helpers" ] diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 6001617668..0f9bfad1b0 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -19,17 +19,14 @@ extern crate tracing; #[cfg(feature = "metrics")] extern crate snarkos_node_metrics as metrics; -use aleo_std::StorageMode; use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - MEMORY_POOL_PORT, - Primary, - helpers::{ConsensusReceiver, PrimarySender, Storage, init_consensus_channels, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, BftCallback, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + +use aleo_std::StorageMode; use snarkvm::{ console::{account::PrivateKey, algorithms::BHP256, types::Address}, ledger::{ @@ -37,7 +34,7 @@ use snarkvm::{ Ledger, block::Transaction, committee::{Committee, MIN_VALIDATOR_STAKE}, - narwhal::{BatchHeader, Data}, + narwhal::{BatchHeader, Data, Subdag, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, store::{ConsensusStore, helpers::memory::ConsensusMemory}, }, @@ -64,9 +61,9 @@ use std::{ net::{IpAddr, Ipv4Addr, SocketAddr}, path::PathBuf, str::FromStr, - sync::{Arc, Mutex, OnceLock, atomic::AtomicBool}, + sync::{Arc, Mutex, OnceLock}, }; -use tokio::{net::TcpListener, sync::oneshot}; +use tokio::net::TcpListener; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -116,13 +113,7 @@ pub fn initialize_logger(verbosity: u8) { /**************************************************************************************************/ /// Starts the BFT instance. -pub async fn start_bft( - node_id: u16, - num_nodes: u16, - peers: HashMap, -) -> Result<(BFT, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +pub async fn start_bft(node_id: u16, num_nodes: u16, peers: HashMap) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -142,21 +133,20 @@ pub async fn start_bft( // Initialize the trusted validators. let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels::(); - // Initialize the consensus receiver handler. - consensus_handler(consensus_receiver); + let consensus_handler = Arc::new(ConsensusHandler {}); // Initialize the BFT instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut bft = - BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?; + let bft = + BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None) + .await?; // Run the BFT instance. - bft.run(None, Some(consensus_sender), sender.clone(), receiver).await?; + bft.run(None, Some(consensus_handler)).await?; // Retrieve the BFT's primary. let primary = bft.primary(); // Handle OS signals. handle_signals(primary); // Return the BFT instance. - Ok((bft, sender)) + Ok(bft) } /// Starts the primary instance. @@ -164,9 +154,7 @@ pub async fn start_primary( node_id: u16, num_nodes: u16, peers: HashMap, -) -> Result<(Primary, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -187,7 +175,7 @@ pub async fn start_primary( let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the primary instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut primary = Primary::::new( + let primary = Primary::::new( account, storage, ledger, @@ -196,13 +184,14 @@ pub async fn start_primary( &trusted_validators, storage_mode, None, - )?; + ) + .await?; // Run the primary instance. - primary.run(None, None, sender.clone(), receiver).await?; + primary.run(None, None, None).await?; // Handle OS signals. handle_signals(&primary); // Return the primary instance. - Ok((primary, sender)) + Ok(primary) } /// Initialize the translucent ledger service. @@ -221,7 +210,7 @@ fn create_ledger( } let mut rng = TestRng::default(); let gen_ledger = genesis_ledger(*gen_key, committee.clone(), balances.clone(), node_id, &mut rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Arc::new(AtomicBool::new(false)))) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } pub type CurrentLedger = Ledger>; @@ -308,25 +297,28 @@ fn initialize_components(node_id: u16, num_nodes: u16) -> Result<(Committee) { - let ConsensusReceiver { mut rx_consensus_subdag } = receiver; +struct ConsensusHandler {} + +#[async_trait::async_trait] +impl BftCallback for ConsensusHandler { + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()> { + // Determine the amount of time to sleep for the subdag. + let subdag_ms = subdag.values().flatten().count(); + // Determine the amount of time to sleep for the transmissions. + let transmissions_ms = transmissions.len() * 25; + // Add a constant delay. + let constant_ms = 100; + // Compute the total amount of time to sleep. + let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; + // Sleep for the determined amount of time. + tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - tokio::task::spawn(async move { - while let Some((subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - // Determine the amount of time to sleep for the subdag. - let subdag_ms = subdag.values().flatten().count(); - // Determine the amount of time to sleep for the transmissions. - let transmissions_ms = transmissions.len() * 25; - // Add a constant delay. - let constant_ms = 100; - // Compute the total amount of time to sleep. - let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; - // Sleep for the determined amount of time. - tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - // Call the callback. - callback.send(Ok(())).ok(); - } - }); + Ok(()) + } } /// Returns the trusted validators. @@ -367,8 +359,7 @@ fn handle_signals(primary: &Primary) { /**************************************************************************************************/ /// Fires *fake* unconfirmed solutions at the node. -fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -392,13 +383,8 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -408,8 +394,7 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u } /// Fires *fake* unconfirmed transactions at the node. -fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); +fn fire_unconfirmed_transactions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -434,13 +419,8 @@ fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -579,14 +559,14 @@ async fn main() -> Result<()> { let mut bft_holder = None; // Start the node. - let (primary, sender) = match args.mode { + let primary = match args.mode { Mode::Bft => { // Start the BFT. - let (bft, sender) = start_bft(args.id, args.num_nodes, peers).await?; + let bft = start_bft(args.id, args.num_nodes, peers).await?; // Set the BFT holder. bft_holder = Some(bft.clone()); // Return the primary and sender. - (bft.primary().clone(), sender) + bft.primary().clone() } Mode::Narwhal => start_primary(args.id, args.num_nodes, peers).await?, }; @@ -598,7 +578,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_solutions) { // Note: We allow the user to overload the solutions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_solutions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_solutions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; @@ -607,7 +587,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_transactions) { // Note: We allow the user to overload the transactions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_transactions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_transactions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; diff --git a/node/bft/ledger-service/Cargo.toml b/node/bft/ledger-service/Cargo.toml index 0a0c95eb79..ecfb230556 100644 --- a/node/bft/ledger-service/Cargo.toml +++ b/node/bft/ledger-service/Cargo.toml @@ -48,6 +48,9 @@ optional = true workspace = true optional = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.parking_lot] workspace = true optional = true diff --git a/node/bft/ledger-service/src/ledger.rs b/node/bft/ledger-service/src/ledger.rs index bdf506e32e..5d9a0aa0c0 100644 --- a/node/bft/ledger-service/src/ledger.rs +++ b/node/bft/ledger-service/src/ledger.rs @@ -14,6 +14,9 @@ // limitations under the License. use crate::{LedgerService, fmt_id, spawn_blocking}; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -46,16 +49,7 @@ use parking_lot::RwLock; #[cfg(not(feature = "serial"))] use rayon::prelude::*; -use std::{ - collections::BTreeMap, - fmt, - io::Read, - ops::Range, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; +use std::{collections::BTreeMap, fmt, io::Read, ops::Range, sync::Arc}; /// The capacity of the cache holding the highest blocks. const BLOCK_CACHE_SIZE: usize = 10; @@ -66,14 +60,14 @@ pub struct CoreLedgerService> { ledger: Ledger, block_cache: Arc>>>, latest_leader: Arc)>>>, - shutdown: Arc, + stoppable: Arc, } impl> CoreLedgerService { /// Initializes a new core ledger service. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { let block_cache = Arc::new(RwLock::new(BTreeMap::new())); - Self { ledger, block_cache, latest_leader: Default::default(), shutdown } + Self { ledger, block_cache, latest_leader: Default::default(), stoppable } } } @@ -371,7 +365,7 @@ impl> LedgerService for CoreLedgerService< #[cfg(feature = "ledger-write")] fn advance_to_next_block(&self, block: &Block) -> Result<()> { // If the Ctrl-C handler registered the signal, then skip advancing to the next block. - if self.shutdown.load(Ordering::Acquire) { + if self.stoppable.is_stopped() { bail!("Skipping advancing to block {} - The node is shutting down", block.height()); } // Advance to the next block. diff --git a/node/bft/ledger-service/src/traits.rs b/node/bft/ledger-service/src/traits.rs index 02b1f2d7e5..8c09857739 100644 --- a/node/bft/ledger-service/src/traits.rs +++ b/node/bft/ledger-service/src/traits.rs @@ -17,13 +17,17 @@ use snarkvm::{ ledger::{ block::{Block, Transaction}, committee::Committee, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, + narwhal::{BatchCertificate, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, prelude::{Address, ConsensusVersion, Field, Network, Result}, }; +#[cfg(feature = "ledger-write")] use indexmap::IndexMap; +#[cfg(feature = "ledger-write")] +use snarkvm::ledger::narwhal::Subdag; + use std::{fmt::Debug, ops::Range}; #[async_trait] diff --git a/node/bft/ledger-service/src/translucent.rs b/node/bft/ledger-service/src/translucent.rs index c6f48a41ec..4c4fba4525 100644 --- a/node/bft/ledger-service/src/translucent.rs +++ b/node/bft/ledger-service/src/translucent.rs @@ -14,8 +14,9 @@ // limitations under the License. use crate::{CoreLedgerService, LedgerService}; -use async_trait::async_trait; -use indexmap::IndexMap; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -27,11 +28,10 @@ use snarkvm::{ }, prelude::{Address, ConsensusVersion, Field, Network, Result, narwhal::BatchCertificate}, }; -use std::{ - fmt, - ops::Range, - sync::{Arc, atomic::AtomicBool}, -}; + +use async_trait::async_trait; +use indexmap::IndexMap; +use std::{fmt, ops::Range, sync::Arc}; pub struct TranslucentLedgerService> { inner: CoreLedgerService, @@ -46,8 +46,8 @@ impl> fmt::Debug for TranslucentLedgerService impl> TranslucentLedgerService { /// Initializes a new ledger service wrapper. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { - Self { inner: CoreLedgerService::new(ledger, shutdown) } + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { + Self { inner: CoreLedgerService::new(ledger, stoppable) } } } diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 2d878d7bfb..ce3d15109a 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,18 +15,9 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - Primary, - helpers::{ - BFTReceiver, - ConsensusSender, - DAG, - PrimaryReceiver, - PrimarySender, - Storage, - fmt_id, - init_bft_channels, - now, - }, + helpers::{CallbackHandle, DAG, Storage, fmt_id, now}, + primary::{Primary, PrimaryCallback}, + sync::SyncCallback, }; use snarkos_account::Account; use snarkos_node_bft_ledger_service::LedgerService; @@ -40,21 +31,19 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::{Field, Network, Result, bail, ensure}, + utilities::LoggableError, }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::{Mutex, RwLock}, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::RwLock, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] -use parking_lot::{Mutex, RwLock}; +use parking_lot::RwLock; use std::{ collections::{BTreeMap, HashSet}, - future::Future, net::SocketAddr, sync::{ Arc, @@ -63,10 +52,16 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{ - sync::{OnceCell, oneshot}, - task::JoinHandle, -}; + +#[async_trait::async_trait] +pub trait BftCallback: Send + std::marker::Sync { + /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()>; +} #[derive(Clone)] pub struct BFT { @@ -78,10 +73,8 @@ pub struct BFT { leader_certificate: Arc>>>, /// The timer for the leader certificate to be received. leader_certificate_timer: Arc, - /// The consensus sender. - consensus_sender: Arc>>, - /// Handles for all spawned tasks. - handles: Arc>>>, + /// The BFT callback (used by `Consensus`). + bft_callback: Arc>>>, /// The BFT lock. lock: Arc>, } @@ -89,7 +82,7 @@ pub struct BFT { impl BFT { /// Initializes a new instance of the BFT. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -100,12 +93,12 @@ impl BFT { dev: Option, ) -> Result { Ok(Self { - primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev)?, + primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev) + .await?, dag: Default::default(), leader_certificate: Default::default(), leader_certificate_timer: Default::default(), - consensus_sender: Default::default(), - handles: Default::default(), + bft_callback: Default::default(), lock: Default::default(), }) } @@ -114,24 +107,19 @@ impl BFT { /// /// This will return as soon as all required tasks are spawned. /// The function must not be called more than once per instance. - pub async fn run( - &mut self, - ping: Option>>, - consensus_sender: Option>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { + pub async fn run(&self, ping: Option>>, bft_callback: Option>>) -> Result<()> { info!("Starting the BFT instance..."); - // Initialize the BFT channels. - let (bft_sender, bft_receiver) = init_bft_channels::(); - // First, start the BFT handlers. - self.start_handlers(bft_receiver); + // Set up callbacks to pass to the primary. + let primary_callback = Some(Arc::new(self.clone()) as Arc>); + let sync_callback = Some(Arc::new(self.clone()) as Arc>); + // Next, run the primary instance. - self.primary.run(ping, Some(bft_sender), primary_sender, primary_receiver).await?; - // Lastly, set the consensus sender. - // Note: This ensures during initial syncing, that the BFT does not advance the ledger. - if let Some(consensus_sender) = consensus_sender { - self.consensus_sender.set(consensus_sender).expect("Consensus sender already set"); + self.primary.run(ping, primary_callback, sync_callback).await?; + + // Lastly, set up callbacks for BFT itself. + // Note: This ensures that, during initial syncing, the BFT does not advance the ledger. + if let Some(callback) = bft_callback { + self.bft_callback.set(callback)?; } Ok(()) } @@ -211,8 +199,9 @@ impl BFT { } } -impl BFT { - /// Stores the certificate in the DAG, and attempts to commit one or more anchors. +#[async_trait::async_trait] +impl PrimaryCallback for BFT { + /// Notification that a new round has started. fn update_to_next_round(&self, current_round: u64) -> bool { // Ensure the current round is at least the storage round (this is a sanity check). let storage_round = self.storage().current_round(); @@ -269,8 +258,8 @@ impl BFT { // If the BFT is ready, then update to the next round. if is_ready { // Update to the next round in storage. - if let Err(e) = self.storage().increment_to_next_round(current_round) { - warn!("BFT failed to increment to the next round from round {current_round} - {e}"); + if let Err(err) = self.storage().increment_to_next_round(current_round) { + err.log_warning(format!("BFT failed to increment to the next round from round {current_round}")); return false; } // Update the timer for the leader certificate. @@ -280,6 +269,41 @@ impl BFT { is_ready } + /// Notification about a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +#[async_trait::async_trait] +impl SyncCallback for BFT { + /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** + /// already exist in the ledger. + /// + /// This method commits all the certificates into the DAG. + /// Note that there is no need to insert the certificates into the DAG, because these certificates + /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()> { + // Acquire the BFT write lock. + let mut dag = self.dag.write(); + + // Commit all the certificates. + for certificate in certificates { + dag.commit(&certificate, self.storage().max_gc_rounds()); + } + + Ok(()) + } + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +impl BFT { /// Updates the leader certificate to the current even round, /// returning `true` if the BFT is ready to update to the next round. /// @@ -312,8 +336,10 @@ impl BFT { // Retrieve the committee lookback of the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the even round {current_round}" + )); return false; } }; @@ -324,8 +350,8 @@ impl BFT { // Compute the leader for the current round. let computed_leader = match committee_lookback.get_leader(current_round) { Ok(leader) => leader, - Err(e) => { - error!("BFT failed to compute the leader for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!("BFT failed to compute the leader for the even round {current_round}")); return false; } }; @@ -403,8 +429,10 @@ impl BFT { // Retrieve the committee lookback for the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the odd round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the odd round {current_round}" + )); return false; } }; @@ -498,7 +526,7 @@ impl BFT { // Retrieve the committee lookback for the commit round. let Ok(committee_lookback) = self.ledger().get_committee_lookback_for_round(commit_round) else { - bail!("BFT failed to retrieve the committee with lag for commit round {commit_round}"); + bail!("BFT failed to retrieve the committee lookback for commit round {commit_round}"); }; // Either retrieve the cached leader or compute it. @@ -573,23 +601,19 @@ impl BFT { for round in (self.dag.read().last_committed_round() + 2..=leader_round.saturating_sub(2)).rev().step_by(2) { // Retrieve the previous committee for the leader round. - let previous_committee_lookback = match self.ledger().get_committee_lookback_for_round(round) { - Ok(committee) => committee, - Err(e) => { - bail!("BFT failed to retrieve a previous committee lookback for the even round {round} - {e}"); - } - }; + let previous_committee_lookback = + self.ledger().get_committee_lookback_for_round(round).with_context(|| { + format!("BFT failed to retrieve a previous committee lookback for the even round {round}") + })?; + // Either retrieve the cached leader or compute it. let leader = match self.ledger().latest_leader() { Some((cached_round, cached_leader)) if cached_round == round => cached_leader, _ => { // Compute the leader for the commit round. - let computed_leader = match previous_committee_lookback.get_leader(round) { - Ok(leader) => leader, - Err(e) => { - bail!("BFT failed to compute the leader for the even round {round} - {e}"); - } - }; + let computed_leader = previous_committee_lookback + .get_leader(round) + .with_context(|| format!("BFT failed to compute the leader for the even round {round}"))?; // Cache the computed leader. self.ledger().update_latest_leader(round, computed_leader); @@ -701,23 +725,12 @@ impl BFT { "BFT failed to commit - the subdag anchor round {anchor_round} does not match the leader round {leader_round}", ); - // Trigger consensus. - if let Some(consensus_sender) = self.consensus_sender.get() { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); + // Trigger the callback (if any). + if let Some(cb) = self.bft_callback.get() { // Send the subdag and transmissions to consensus. - consensus_sender.tx_consensus_subdag.send((subdag, transmissions, callback_sender)).await?; - // Await the callback to continue. - match callback_receiver.await { - Ok(Ok(())) => (), // continue - Ok(Err(e)) => { - error!("BFT failed to advance the subdag for round {anchor_round} - {e}"); - return Ok(()); - } - Err(e) => { - error!("BFT failed to receive the callback for round {anchor_round} - {e}"); - return Ok(()); - } + if let Err(err) = cb.process_bft_subdag(subdag, transmissions).await { + err.log_error(format!("BFT failed to advance the subdag for round {anchor_round}")); + return Ok(()); } } @@ -855,92 +868,21 @@ impl BFT { } impl BFT { - /// Starts the BFT handlers. - fn start_handlers(&self, bft_receiver: BFTReceiver) { - let BFTReceiver { - mut rx_primary_round, - mut rx_primary_certificate, - mut rx_sync_bft_dag_at_bootup, - mut rx_sync_bft, - } = bft_receiver; - - // Process the current round from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((current_round, callback)) = rx_primary_round.recv().await { - callback.send(self_.update_to_next_round(current_round)).ok(); - } - }); - - // Process the certificate from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_primary_certificate.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - - // Process the request to sync the BFT DAG at bootup. - let self_ = self.clone(); - self.spawn(async move { - while let Some(certificates) = rx_sync_bft_dag_at_bootup.recv().await { - self_.sync_bft_dag_at_bootup(certificates).await; - } - }); - - // Handler for new certificates that were fetched by the sync module. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_sync_bft.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - } - - /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** - /// already exist in the ledger. - /// - /// This method commits all the certificates into the DAG. - /// Note that there is no need to insert the certificates into the DAG, because these certificates - /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. - async fn sync_bft_dag_at_bootup(&self, certificates: Vec>) { - // Acquire the BFT write lock. - let mut dag = self.dag.write(); - - // Commit all the certificates. - for certificate in certificates { - dag.commit(&certificate, self.storage().max_gc_rounds()); - } - } - - /// Spawns a task with the given future; it should only be used for long-running tasks. - fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); - } - /// Shuts down the BFT. pub async fn shut_down(&self) { info!("Shutting down the BFT..."); + // Remove the callback. + self.bft_callback.clear(); // Acquire the lock. let _lock = self.lock.lock().await; // Shut down the primary. self.primary.shut_down().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); } } #[cfg(test)] mod tests { - use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage}; + use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage, sync::SyncCallback}; use snarkos_account::Account; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -957,7 +899,10 @@ mod tests { use aleo_std::StorageMode; use anyhow::Result; use indexmap::{IndexMap, IndexSet}; - use std::sync::Arc; + use std::{ + net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + sync::Arc, + }; type CurrentNetwork = snarkvm::console::network::MainnetV0; @@ -985,29 +930,34 @@ mod tests { } // Helper function to set up BFT for testing. - fn initialize_bft( + async fn initialize_bft( account: Account, storage: Storage, ledger: Arc>, ) -> anyhow::Result> { // Create the block synchronization logic. let block_sync = Arc::new(BlockSync::new(ledger.clone())); + + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the BFT. BFT::new( account.clone(), storage.clone(), ledger.clone(), block_sync, - None, + Some(any_addr), &[], StorageMode::new_test(None), None, ) + .await } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_odd() -> Result<()> { + async fn test_is_leader_quorum_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -1036,7 +986,7 @@ mod tests { // Initialize the account. let account = Account::new(rng)?; // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call succeeds on an odd round. let result = bft.is_leader_quorum_or_nonleaders_available(1); @@ -1059,9 +1009,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { + async fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1071,7 +1021,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Store is at round 1, and we are checking for round 2. @@ -1081,9 +1031,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even() -> Result<()> { + async fn test_is_leader_quorum_even() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1093,7 +1043,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an even round. @@ -1102,9 +1052,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_even_round_ready() -> Result<()> { + async fn test_is_even_round_ready() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -1134,7 +1084,7 @@ mod tests { let account = Account::new(rng)?; // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Set the leader certificate. @@ -1148,7 +1098,7 @@ mod tests { assert!(result); // Initialize a new BFT. - let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // If the leader certificate is not set and the timer has not expired, we are not ready for the next round. let result = bft_timer.is_even_round_ready_for_next_round(certificates.clone(), committee.clone(), 2); if !bft_timer.is_timer_expired() { @@ -1169,9 +1119,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_odd() -> Result<()> { + async fn test_update_leader_certificate_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1179,7 +1129,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an odd round. @@ -1188,9 +1138,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_bad_round() -> Result<()> { + async fn test_update_leader_certificate_bad_round() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1198,7 +1148,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Ensure this call succeeds on an even round. let result = bft.update_leader_certificate_to_even_round(6); @@ -1206,9 +1156,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_even() -> Result<()> { + async fn test_update_leader_certificate_even() -> Result<()> { let rng = &mut TestRng::default(); // Set the current round. @@ -1250,7 +1200,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Set the leader certificate. *bft.leader_certificate.write() = Some(leader_certificate); @@ -1288,7 +1238,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(3); @@ -1318,7 +1268,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(2); @@ -1350,9 +1300,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { + async fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1376,7 +1326,7 @@ mod tests { /* Test missing previous certificate. */ // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // The expected error message. let error_msg = format!( @@ -1437,7 +1387,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1503,7 +1453,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1521,10 +1471,10 @@ mod tests { // Initialize a new instance of storage. let storage_2 = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT. - let bootup_bft = initialize_bft(account.clone(), storage_2, ledger)?; + let bootup_bft = initialize_bft(account.clone(), storage_2, ledger).await?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(certificates.clone()).await.unwrap(); // Check that the BFT starts from the same last committed round. assert_eq!(bft.dag.read().last_committed_round(), bootup_bft.dag.read().last_committed_round()); @@ -1675,7 +1625,7 @@ mod tests { // Initialize the BFT without bootup. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); @@ -1700,10 +1650,10 @@ mod tests { let bootup_storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT with bootup. - let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone()).await?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates to the storage and BFT with bootup. for certificate in post_shutdown_certificates.iter() { @@ -1878,12 +1828,12 @@ mod tests { } // Initialize the bootup BFT. let account = Account::new(rng)?; - let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bootup_bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates into the storage. let mut post_shutdown_certificates: Vec> = diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 98f1ed8dc4..536631de88 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -13,6 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod resolver; +use resolver::Resolver; + +mod cache; +use cache::Cache; + #[cfg(feature = "telemetry")] use crate::helpers::Telemetry; use crate::{ @@ -20,9 +26,8 @@ use crate::{ MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Worker, - events::{EventCodec, PrimaryPing}, - helpers::{Cache, PrimarySender, Resolver, Storage, SyncSender, WorkerSender, assign_to_worker}, - spawn_blocking, + events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, + helpers::{CallbackHandle, Storage, WorkerSender, assign_to_worker}, }; use snarkos_account::Account; use snarkos_node_bft_events::{ @@ -42,7 +47,7 @@ use snarkos_node_bft_events::{ ValidatorsResponse, }; use snarkos_node_bft_ledger_service::LedgerService; -use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService}; +use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService, locators::BlockLocators}; use snarkos_node_tcp::{ Config, Connection, @@ -56,12 +61,18 @@ use snarkos_node_tcp::{ use snarkvm::{ console::prelude::*, ledger::{ + Block, committee::Committee, - narwhal::{BatchHeader, Data}, + narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, + utilities::{ + LoggableError, + task::{self, JoinHandle}, + }, }; +use anyhow::Context; use colored::Colorize; use futures::SinkExt; use indexmap::{IndexMap, IndexSet}; @@ -83,7 +94,6 @@ use std::{ use tokio::{ net::TcpStream, sync::{OnceCell, oneshot}, - task::{self, JoinHandle}, }; use tokio_stream::StreamExt; use tokio_util::codec::Framed; @@ -117,6 +127,35 @@ pub trait Transport: Send + Sync { fn broadcast(&self, event: Event); } +/// Callback for events specific to BlockSync. +pub trait GatewaySyncCallback: Send + Sync { + /// We received a block response and can (possibly) advance synchronization. + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()>; + + /// We received new peer locators during a Ping. + fn update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()>; + + /// A peer disconnected. + fn remove_peer(&self, peer_ip: SocketAddr); + + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest); + + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse); +} + +/// Callback for primary-specific events +#[async_trait::async_trait] +pub trait GatewayPrimaryCallback: Send + Sync { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>); + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose); + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature); + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>); +} + /// The gateway maintains connections to other validators. /// For connections with clients and provers, the Router logic is used. #[derive(Clone)] @@ -145,12 +184,12 @@ pub struct Gateway { /// The validator telemetry. #[cfg(feature = "telemetry")] validator_telemetry: Telemetry, - /// The primary sender. - primary_sender: Arc>>, /// The worker senders. worker_senders: Arc>>>, - /// The sync sender. - sync_sender: Arc>>, + /// The callback for sync messages. + sync_callback: Arc>>>, + /// The callback for bft/primary messages. + primary_callback: Arc>>>, /// The spawned handles. handles: Arc>>>, /// The development mode. @@ -173,8 +212,14 @@ impl Gateway { (None, None) => SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, MEMORY_POOL_PORT)), (Some(ip), _) => ip, }; + + // Allow at most as many connections as the maximum committe size. + // and fail if the chosen port is not available. + let mut tcp_config = Config::new(ip, Committee::::max_committee_size()?); + tcp_config.allow_random_port = false; + // Initialize the TCP stack. - let tcp = Tcp::new(Config::new(ip, Committee::::max_committee_size()?)); + let tcp = Tcp::new(tcp_config); // Return the gateway. Ok(Self { @@ -189,9 +234,9 @@ impl Gateway { connecting_peers: Default::default(), #[cfg(feature = "telemetry")] validator_telemetry: Default::default(), - primary_sender: Default::default(), + primary_callback: Default::default(), worker_senders: Default::default(), - sync_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), dev, }) @@ -200,21 +245,18 @@ impl Gateway { /// Run the gateway. pub async fn run( &self, - primary_sender: PrimarySender, worker_senders: IndexMap>, - sync_sender: Option>, - ) { + primary_callback: Arc>, + sync_callback: Option>>, + ) -> Result<()> { debug!("Starting the gateway for the memory pool..."); - // Set the primary sender. - self.primary_sender.set(primary_sender).expect("Primary sender already set in gateway"); + self.worker_senders.set(worker_senders).with_context(|| "The worker senders are already set")?; - // Set the worker senders. - self.worker_senders.set(worker_senders).expect("The worker senders are already set"); + self.primary_callback.set(primary_callback)?; - // If the sync sender was provided, set the sync sender. - if let Some(sync_sender) = sync_sender { - self.sync_sender.set(sync_sender).expect("Sync sender already set in gateway"); + if let Some(sync_callback) = sync_callback { + self.sync_callback.set(sync_callback)?; } // Enable the TCP protocols. @@ -225,13 +267,15 @@ impl Gateway { self.enable_on_connect().await; // Enable the TCP listener. Note: This must be called after the above protocols. - let listen_addr = self.tcp.enable_listener().await.expect("Failed to enable the TCP listener"); + let listen_addr = self.tcp.enable_listener().await.with_context(|| "Failed to enable the TCP listener")?; debug!("Listening for validator connections at address {listen_addr:?}"); // Initialize the heartbeat. self.initialize_heartbeat(); info!("Started the gateway for the memory pool at '{}'", self.local_ip()); + + Ok(()) } } @@ -292,7 +336,7 @@ impl CommunicationService for Gateway { let tcp = self.tcp().clone(); tcp.banned_peers().update_ip_ban(peer_ip.ip()); - tokio::spawn(async move { + task::spawn(async move { tcp.disconnect(peer_ip).await; }); } @@ -336,11 +380,6 @@ impl Gateway { &self.validator_telemetry } - /// Returns the primary sender. - pub fn primary_sender(&self) -> &PrimarySender { - self.primary_sender.get().expect("Primary sender not set in gateway") - } - /// Returns the number of workers. pub fn num_workers(&self) -> u8 { u8::try_from(self.worker_senders.get().expect("Missing worker senders in gateway").len()) @@ -451,7 +490,7 @@ impl Gateway { } let self_ = self.clone(); - Some(tokio::spawn(async move { + Some(task::spawn(async move { debug!("Connecting to validator {peer_ip}..."); // Attempt to connect to the peer. if let Err(error) = self_.tcp.connect(peer_ip).await { @@ -549,13 +588,8 @@ impl Gateway { /// Removes the connected peer and adds them to the candidate peers. fn remove_connected_peer(&self, peer_ip: SocketAddr) { // Remove the peer from the sync module. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { - let tx_block_sync_remove_peer_ = sync_sender.tx_block_sync_remove_peer.clone(); - tokio::spawn(async move { - if let Err(e) = tx_block_sync_remove_peer_.send(peer_ip).await { - warn!("Unable to remove '{peer_ip}' from the sync module - {e}"); - } - }); + if let Some(cb) = &*self.sync_callback.get_ref() { + cb.remove_peer(peer_ip); } // Removes the bidirectional map between the listener address and (ambiguous) peer address. self.resolver.remove_peer(peer_ip); @@ -643,17 +677,32 @@ impl Gateway { match event { Event::BatchPropose(batch_propose) => { // Send the batch propose to the primary. - let _ = self.primary_sender().tx_batch_propose.send((peer_ip, batch_propose)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_propose(peer_ip, batch_propose) + .await; Ok(()) } Event::BatchSignature(batch_signature) => { // Send the batch signature to the primary. - let _ = self.primary_sender().tx_batch_signature.send((peer_ip, batch_signature)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_signature(peer_ip, batch_signature) + .await; Ok(()) } Event::BatchCertified(batch_certified) => { // Send the batch certificate to the primary. - let _ = self.primary_sender().tx_batch_certified.send((peer_ip, batch_certified.certificate)).await; + let _ = self + .primary_callback + .get() + .expect("No callback set") + .process_batch_certified(peer_ip, batch_certified.certificate) + .await; Ok(()) } Event::BlockRequest(block_request) => { @@ -669,22 +718,17 @@ impl Gateway { } let self_ = self.clone(); - let blocks = match task::spawn_blocking(move || { + let blocks = task::spawn_blocking(move || { // Retrieve the blocks within the requested range. match self_.ledger.get_blocks(start_height..end_height) { Ok(blocks) => Ok(Data::Object(DataBlocks(blocks))), Err(error) => bail!("Missing blocks {start_height} to {end_height} from ledger - {error}"), } }) - .await - { - Ok(Ok(blocks)) => blocks, - Ok(Err(error)) => return Err(error), - Err(error) => return Err(anyhow!("[BlockRequest] {error}")), - }; + .await?; let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Send the `BlockResponse` message to the peer. let event = Event::BlockResponse(BlockResponse { request: block_request, blocks }); Transport::send(&self_, peer_ip, event).await; @@ -693,7 +737,7 @@ impl Gateway { } Event::BlockResponse(block_response) => { // Process the block response. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = self.sync_callback.get() { // Retrieve the block response. let BlockResponse { request, blocks } = block_response; @@ -719,8 +763,8 @@ impl Gateway { // Ensure the block response is well-formed. blocks.ensure_response_is_well_formed(peer_ip, request.start_height, request.end_height)?; // Send the blocks to the sync module. - if let Err(e) = sync_sender.advance_with_sync_blocks(peer_ip, blocks.0).await { - warn!("Unable to process block response from '{peer_ip}' - {e}"); + if let Err(err) = cb.insert_block_response(peer_ip, blocks.0) { + err.log_warning(format!("Unable to process block response from '{peer_ip}'")); } } Ok(()) @@ -728,18 +772,18 @@ impl Gateway { Event::CertificateRequest(certificate_request) => { // Send the certificate request to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate request to the sync module. - let _ = sync_sender.tx_certificate_request.send((peer_ip, certificate_request)).await; + cb.send_certificate_response(peer_ip, certificate_request); } Ok(()) } Event::CertificateResponse(certificate_response) => { // Send the certificate response to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate response to the sync module. - let _ = sync_sender.tx_certificate_response.send((peer_ip, certificate_response)).await; + cb.finish_certificate_request(peer_ip, certificate_response); } Ok(()) } @@ -759,15 +803,19 @@ impl Gateway { } // Update the peer locators. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Check the block locators are valid, and update the validators in the sync module. - if let Err(error) = sync_sender.update_peer_locators(peer_ip, block_locators).await { + if let Err(error) = cb.update_peer_locators(peer_ip, block_locators) { bail!("Validator '{peer_ip}' sent invalid block locators - {error}"); } } // Send the batch certificates to the primary. - let _ = self.primary_sender().tx_primary_ping.send((peer_ip, primary_certificate)).await; + self.primary_callback + .get() + .expect("No callback set") + .process_incoming_ping(peer_ip, primary_certificate) + .await; Ok(()) } Event::TransmissionRequest(request) => { @@ -811,7 +859,7 @@ impl Gateway { connected_peers.shuffle(&mut rand::thread_rng()); let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Initialize the validators. let mut validators = IndexMap::with_capacity(MAX_VALIDATORS_TO_SEND); // Iterate over the validators. @@ -843,7 +891,7 @@ impl Gateway { if self.number_of_connected_peers() < MIN_CONNECTED_VALIDATORS { // Attempt to connect to any validators that are not already connected. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { for (validator_ip, validator_address) in validators { if self_.dev.is_some() { // Ensure the validator IP is not this node. @@ -909,7 +957,7 @@ impl Gateway { /// Disconnects from the given peer IP, if the peer is connected. pub fn disconnect(&self, peer_ip: SocketAddr) -> JoinHandle<()> { let gateway = self.clone(); - tokio::spawn(async move { + task::spawn(async move { if let Some(peer_addr) = gateway.resolver.get_ambiguous(peer_ip) { // Disconnect from this peer. let _disconnected = gateway.tcp.disconnect(peer_addr).await; @@ -935,18 +983,20 @@ impl Gateway { } /// Spawns a task with the given future; it should only be used for long-running tasks. - #[allow(dead_code)] fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the gateway. pub async fn shut_down(&self) { info!("Shutting down the gateway..."); // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Close the listener. self.tcp.shut_down().await; + // Remove the sync and primary callback (so they can be dropped). + self.sync_callback.clear(); + self.primary_callback.clear(); } } @@ -1041,7 +1091,7 @@ impl Gateway { /// This function attempts to disconnect any validators that are not in the current committee. fn handle_unauthorized_validators(&self) { let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Retrieve the connected validators. let validators = self_.connected_peers().read().clone(); // Iterate over the validator IPs. @@ -1071,7 +1121,7 @@ impl Gateway { // Select a random validator IP. if let Some(validator_ip) = validators.into_iter().choose(&mut rand::thread_rng()) { let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Increment the number of outbound validators requests for this validator. self_.cache.increment_outbound_validators_requests(validator_ip); // Send a `ValidatorsRequest` to the validator. @@ -1088,7 +1138,7 @@ impl Gateway { if let Some(peer_ip) = self.resolver.get_listener(peer_addr) { warn!("{CONTEXT} Disconnecting from '{peer_ip}' - {error}"); let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { Transport::send(&self_, peer_ip, DisconnectReason::ProtocolViolation.into()).await; // Disconnect from this peer. self_.disconnect(peer_ip); @@ -1160,7 +1210,7 @@ impl Transport for Gateway { if self.number_of_connected_peers() > 0 { let self_ = self.clone(); let connected_peers = self.connected_peers.read().clone(); - tokio::spawn(async move { + task::spawn(async move { // Iterate through all connected peers. for peer_ip in connected_peers { // Send the event to the peer. @@ -1195,7 +1245,7 @@ impl Reading for Gateway { let self_ = self.clone(); // Handle BlockRequest and BlockResponse messages in a separate task to not block the // inbound queue. - tokio::spawn(async move { + task::spawn(async move { self_.process_message_inner(peer_addr, message).await; }); } else { @@ -1542,7 +1592,7 @@ impl Gateway { return Some(DisconnectReason::InvalidChallengeResponse); } // Perform the deferred non-blocking deserialization of the signature. - let Ok(signature) = spawn_blocking!(signature.deserialize_blocking()) else { + let Ok(signature) = task::spawn_blocking(|| signature.deserialize_blocking()).await else { warn!("{CONTEXT} Gateway handshake with '{peer_addr}' failed (cannot deserialize the signature)"); return Some(DisconnectReason::InvalidChallengeResponse); }; @@ -1555,15 +1605,55 @@ impl Gateway { } } +#[cfg(any(test, feature = "test"))] +pub mod test_helpers { + use super::*; + + type CurrentNetwork = MainnetV0; + + #[derive(Default)] + pub struct DummyGatewayPrimaryCallback {} + + #[async_trait::async_trait] + impl GatewayPrimaryCallback for DummyGatewayPrimaryCallback { + async fn process_incoming_ping( + &self, + _peer_ip: SocketAddr, + _primary_certificate: Data>, + ) { + } + + async fn process_batch_propose(&self, _peer_ip: SocketAddr, _batch_propose: BatchPropose) {} + + async fn process_batch_signature( + &self, + _peer_ip: SocketAddr, + _batch_signature: BatchSignature, + ) { + } + + async fn process_batch_certified( + &self, + _peer_ip: SocketAddr, + _batch_certificate: Data>, + ) { + } + } +} + #[cfg(test)] mod prop_tests { - use crate::{ + use super::{ Gateway, + prop_tests::GatewayAddress::{Dev, Prod}, + test_helpers::DummyGatewayPrimaryCallback, + }; + + use crate::{ MAX_WORKERS, MEMORY_POOL_PORT, Worker, - gateway::prop_tests::GatewayAddress::{Dev, Prod}, - helpers::{Storage, init_primary_channels, init_worker_channels}, + helpers::{Storage, init_worker_channels}, }; use snarkos_account::Account; use snarkos_node_bft_ledger_service::MockLedgerService; @@ -1611,17 +1701,11 @@ mod prop_tests { impl GatewayAddress { fn ip(&self) -> Option { - if let GatewayAddress::Prod(ip) = self { - return *ip; - } - None + if let GatewayAddress::Prod(ip) = self { *ip } else { None } } fn port(&self) -> Option { - if let GatewayAddress::Dev(port) = self { - return Some(*port as u16); - } - None + if let GatewayAddress::Dev(port) = self { Some(*port as u16) } else { None } } } @@ -1678,8 +1762,8 @@ mod prop_tests { .boxed() } - #[proptest] - fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1693,10 +1777,13 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } - #[proptest] - fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1715,6 +1802,9 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest(async = "tokio")] @@ -1730,8 +1820,6 @@ mod prop_tests { let gateway = Gateway::new(account, storage.clone(), storage.ledger().clone(), dev.ip(), &[], dev.port()).unwrap(); - let (primary_sender, _) = init_primary_channels(); - let (workers, worker_senders) = { // Construct a map of the worker senders. let mut tx_workers = IndexMap::new(); @@ -1756,12 +1844,15 @@ mod prop_tests { (workers, tx_workers) }; - gateway.run(primary_sender, worker_senders, None).await; + gateway.run(worker_senders, Arc::new(DummyGatewayPrimaryCallback::default()), None).await.unwrap(); assert_eq!( gateway.local_ip(), SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) ); assert_eq!(gateway.num_workers(), workers.len() as u8); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest] diff --git a/node/bft/src/helpers/cache.rs b/node/bft/src/gateway/cache.rs similarity index 99% rename from node/bft/src/helpers/cache.rs rename to node/bft/src/gateway/cache.rs index 377bdc724e..fca474bba0 100644 --- a/node/bft/src/helpers/cache.rs +++ b/node/bft/src/gateway/cache.rs @@ -54,13 +54,7 @@ pub struct Cache { impl Default for Cache { /// Initializes a new instance of the cache. fn default() -> Self { - Self::new() - } -} - -impl Cache { - /// Initializes a new instance of the cache. - pub fn new() -> Self { + // This needs to be manually implemented as `Network` does not implement `Default`. Self { seen_inbound_connections: Default::default(), seen_inbound_events: Default::default(), diff --git a/node/bft/src/helpers/resolver.rs b/node/bft/src/gateway/resolver.rs similarity index 100% rename from node/bft/src/helpers/resolver.rs rename to node/bft/src/gateway/resolver.rs diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1370ee33de..1f23e38264 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -13,196 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::events::{ - BatchPropose, - BatchSignature, - CertificateRequest, - CertificateResponse, - TransmissionRequest, - TransmissionResponse, -}; -use snarkos_node_sync::locators::BlockLocators; -use snarkvm::{ - console::network::*, - ledger::{ - block::{Block, Transaction}, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, - puzzle::{Solution, SolutionID}, - }, - prelude::Result, -}; +use crate::events::{TransmissionRequest, TransmissionResponse}; +use snarkvm::{console::network::*, ledger::narwhal::TransmissionID}; -use indexmap::IndexMap; use std::net::SocketAddr; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::mpsc; const MAX_CHANNEL_SIZE: usize = 8192; -#[derive(Debug)] -pub struct ConsensusSender { - pub tx_consensus_subdag: - mpsc::Sender<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -#[derive(Debug)] -pub struct ConsensusReceiver { - pub rx_consensus_subdag: - mpsc::Receiver<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -/// Initializes the consensus channels. -pub fn init_consensus_channels() -> (ConsensusSender, ConsensusReceiver) { - let (tx_consensus_subdag, rx_consensus_subdag) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = ConsensusSender { tx_consensus_subdag }; - let receiver = ConsensusReceiver { rx_consensus_subdag }; - - (sender, receiver) -} - -/// "Interface" that enables, for example, sending data from storage to the the BFT logic. -#[derive(Clone, Debug)] -pub struct BFTSender { - pub tx_primary_round: mpsc::Sender<(u64, oneshot::Sender)>, - pub tx_primary_certificate: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, - pub tx_sync_bft_dag_at_bootup: mpsc::Sender>>, - pub tx_sync_bft: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, -} - -impl BFTSender { - /// Sends the current round to the BFT. - pub async fn send_primary_round_to_bft(&self, current_round: u64) -> Result { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the current round to the BFT. - self.tx_primary_round.send((current_round, callback_sender)).await?; - // Await the callback to continue. - Ok(callback_receiver.await?) - } - - /// Sends the batch certificate to the BFT. - pub async fn send_primary_certificate_to_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT. - self.tx_primary_certificate.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the batch certificates to the BFT for syncing. - pub async fn send_sync_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT for syncing. - self.tx_sync_bft.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -/// Receiving counterpart to `BFTSender` -#[derive(Debug)] -pub struct BFTReceiver { - pub rx_primary_round: mpsc::Receiver<(u64, oneshot::Sender)>, - pub rx_primary_certificate: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, - pub rx_sync_bft_dag_at_bootup: mpsc::Receiver>>, - pub rx_sync_bft: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, -} - -/// Initializes the BFT channels, and returns the sending and receiving ends. -pub fn init_bft_channels() -> (BFTSender, BFTReceiver) { - let (tx_primary_round, rx_primary_round) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_certificate, rx_primary_certificate) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft_dag_at_bootup, rx_sync_bft_dag_at_bootup) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft, rx_sync_bft) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = BFTSender { tx_primary_round, tx_primary_certificate, tx_sync_bft_dag_at_bootup, tx_sync_bft }; - let receiver = BFTReceiver { rx_primary_round, rx_primary_certificate, rx_sync_bft_dag_at_bootup, rx_sync_bft }; - - (sender, receiver) -} - -#[derive(Clone, Debug)] -pub struct PrimarySender { - pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, - pub tx_batch_signature: mpsc::Sender<(SocketAddr, BatchSignature)>, - pub tx_batch_certified: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_primary_ping: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_unconfirmed_solution: mpsc::Sender<(SolutionID, Data>, oneshot::Sender>)>, - pub tx_unconfirmed_transaction: mpsc::Sender<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -impl PrimarySender { - /// Sends the unconfirmed solution to the primary. - pub async fn send_unconfirmed_solution( - &self, - solution_id: SolutionID, - solution: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed solution to the primary. - self.tx_unconfirmed_solution.send((solution_id, solution, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the unconfirmed transaction to the primary. - pub async fn send_unconfirmed_transaction( - &self, - transaction_id: N::TransactionID, - transaction: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed transaction to the primary. - self.tx_unconfirmed_transaction.send((transaction_id, transaction, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct PrimaryReceiver { - pub rx_batch_propose: mpsc::Receiver<(SocketAddr, BatchPropose)>, - pub rx_batch_signature: mpsc::Receiver<(SocketAddr, BatchSignature)>, - pub rx_batch_certified: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_primary_ping: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_unconfirmed_solution: mpsc::Receiver<(SolutionID, Data>, oneshot::Sender>)>, - pub rx_unconfirmed_transaction: - mpsc::Receiver<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -/// Initializes the primary channels. -pub fn init_primary_channels() -> (PrimarySender, PrimaryReceiver) { - let (tx_batch_propose, rx_batch_propose) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_signature, rx_batch_signature) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_certified, rx_batch_certified) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_ping, rx_primary_ping) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_solution, rx_unconfirmed_solution) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_transaction, rx_unconfirmed_transaction) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = PrimarySender { - tx_batch_propose, - tx_batch_signature, - tx_batch_certified, - tx_primary_ping, - tx_unconfirmed_solution, - tx_unconfirmed_transaction, - }; - let receiver = PrimaryReceiver { - rx_batch_propose, - rx_batch_signature, - rx_batch_certified, - rx_primary_ping, - rx_unconfirmed_solution, - rx_unconfirmed_transaction, - }; - - (sender, receiver) -} - #[derive(Debug)] pub struct WorkerSender { pub tx_worker_ping: mpsc::Sender<(SocketAddr, TransmissionID)>, @@ -228,77 +46,3 @@ pub fn init_worker_channels() -> (WorkerSender, WorkerReceiver (sender, receiver) } - -#[derive(Debug)] -pub struct SyncSender { - pub tx_block_sync_advance_with_sync_blocks: mpsc::Sender<(SocketAddr, Vec>, oneshot::Sender>)>, - pub tx_block_sync_remove_peer: mpsc::Sender, - pub tx_block_sync_update_peer_locators: mpsc::Sender<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub tx_certificate_request: mpsc::Sender<(SocketAddr, CertificateRequest)>, - pub tx_certificate_response: mpsc::Sender<(SocketAddr, CertificateResponse)>, -} - -impl SyncSender { - /// Sends the request to update the peer locators. - pub async fn update_peer_locators(&self, peer_ip: SocketAddr, block_locators: BlockLocators) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to update the peer locators. - // This `tx_block_sync_update_peer_locators.send()` call - // causes the `rx_block_sync_update_peer_locators.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_update_peer_locators.send((peer_ip, block_locators, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the request to advance with sync blocks. - pub async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to advance with sync blocks. - // This `tx_block_sync_advance_with_sync_blocks.send()` call - // causes the `rx_block_sync_advance_with_sync_blocks.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_advance_with_sync_blocks.send((peer_ip, blocks, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct SyncReceiver { - pub rx_block_sync_advance_with_sync_blocks: - mpsc::Receiver<(SocketAddr, Vec>, oneshot::Sender>)>, - pub rx_block_sync_remove_peer: mpsc::Receiver, - pub rx_block_sync_update_peer_locators: mpsc::Receiver<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub rx_certificate_request: mpsc::Receiver<(SocketAddr, CertificateRequest)>, - pub rx_certificate_response: mpsc::Receiver<(SocketAddr, CertificateResponse)>, -} - -/// Initializes the sync channels. -pub fn init_sync_channels() -> (SyncSender, SyncReceiver) { - let (tx_block_sync_advance_with_sync_blocks, rx_block_sync_advance_with_sync_blocks) = - mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_remove_peer, rx_block_sync_remove_peer) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_update_peer_locators, rx_block_sync_update_peer_locators) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_request, rx_certificate_request) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_response, rx_certificate_response) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = SyncSender { - tx_block_sync_advance_with_sync_blocks, - tx_block_sync_remove_peer, - tx_block_sync_update_peer_locators, - tx_certificate_request, - tx_certificate_response, - }; - let receiver = SyncReceiver { - rx_block_sync_advance_with_sync_blocks, - rx_block_sync_remove_peer, - rx_block_sync_update_peer_locators, - rx_certificate_request, - rx_certificate_response, - }; - - (sender, receiver) -} diff --git a/node/bft/src/helpers/dag.rs b/node/bft/src/helpers/dag.rs index 53f149734f..babb62d16b 100644 --- a/node/bft/src/helpers/dag.rs +++ b/node/bft/src/helpers/dag.rs @@ -128,7 +128,9 @@ impl DAG { // Update the recently committed IDs. let is_new = self.recent_committed_ids.entry(certificate_round).or_default().insert(certificate_id); - if !is_new { + if is_new { + trace!("Got new commit for certificate {certificate_id} at round {certificate_round}"); + } else { //TODO (kaimast): return early here? trace!("Certificate {certificate_id} was already committed for round {certificate_round}"); } diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 7d9dd7f531..16a7d70fa9 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -13,9 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod cache; -pub use cache::*; - pub mod channels; pub use channels::*; @@ -34,16 +31,13 @@ pub use proposal::*; pub mod proposal_cache; pub use proposal_cache::*; -pub mod ready; -pub use ready::*; - -pub mod resolver; -pub use resolver::*; +mod ready; +pub(crate) use ready::*; pub mod signed_proposals; pub use signed_proposals::*; -pub mod storage; +mod storage; pub use storage::*; #[cfg(feature = "telemetry")] @@ -54,6 +48,15 @@ pub use telemetry::*; pub mod timestamp; pub use timestamp::*; +use anyhow::{Result, bail}; +#[cfg(feature = "locktick")] +use locktick::{ + LockGuard, + parking_lot::{RwLock, RwLockReadGuard}, +}; +#[cfg(not(feature = "locktick"))] +use parking_lot::{RwLock, RwLockReadGuard}; + /// Formats an ID into a truncated identifier (for logging purposes). pub fn fmt_id(id: impl ToString) -> String { let id = id.to_string(); @@ -63,3 +66,57 @@ pub fn fmt_id(id: impl ToString) -> String { } formatted_id } + +/// Helper struct to hold a reference to a callback struct. +pub struct CallbackHandle { + callback: RwLock>, +} + +impl Default for CallbackHandle { + /// By default, the handle holds no callback. + fn default() -> Self { + Self { callback: RwLock::new(None) } + } +} + +impl CallbackHandle { + /// Set a callback. Returns an error if a callback was already set. + pub fn set(&self, callback: C) -> Result<()> { + let prev = self.callback.write().replace(callback); + + if prev.is_some() { + bail!("Callback was already set"); + } + + Ok(()) + } + + /// Get a cloned copy of the callback. + /// Useful when the callback will be used across await-boundaries. + #[inline] + pub fn get(&self) -> Option { + self.callback.read().clone() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(feature = "locktick")] + #[inline] + pub fn get_ref(&self) -> LockGuard>> { + self.callback.read() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(not(feature = "locktick"))] + #[inline] + pub fn get_ref(&self) -> RwLockReadGuard<'_, Option> { + self.callback.read() + } + + /// Remove the callback. + /// Used during shutdown to resolve circular dependencies between types. + pub fn clear(&self) { + let _ = self.callback.write().take(); + } +} diff --git a/node/bft/src/helpers/partition.rs b/node/bft/src/helpers/partition.rs index 809c9fb814..f171a2a27d 100644 --- a/node/bft/src/helpers/partition.rs +++ b/node/bft/src/helpers/partition.rs @@ -19,7 +19,7 @@ use snarkvm::{ prelude::{Network, ToBytes}, }; -use anyhow::{Result, bail}; +use anyhow::{Result, bail, ensure}; use sha2::{Digest, Sha256}; fn double_sha256(data: &[u8]) -> [u8; 32] { @@ -38,6 +38,8 @@ pub fn sha256d_to_u128(data: &[u8]) -> u128 { /// Returns the worker ID for the given transmission ID. pub fn assign_to_worker(transmission_id: impl Into>, num_workers: u8) -> Result { + ensure!(num_workers > 0, "Need at least one worker"); + // If there is only one worker, return it. if num_workers == 1 { return Ok(0); diff --git a/node/bft/src/helpers/ready.rs b/node/bft/src/helpers/ready.rs index b69d546aef..b0d04ed17e 100644 --- a/node/bft/src/helpers/ready.rs +++ b/node/bft/src/helpers/ready.rs @@ -52,6 +52,7 @@ impl Ready { } /// Returns `true` if the ready queue is empty. + #[cfg(test)] pub fn is_empty(&self) -> bool { self.transmissions.is_empty() } diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index c92032d4c7..e1ae90dbc8 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -32,19 +32,18 @@ pub use snarkos_node_bft_storage_service as storage_service; pub mod helpers; mod bft; -pub use bft::*; +pub use bft::{BFT, BftCallback}; -mod gateway; -pub use gateway::*; - -mod primary; -pub use primary::*; +pub mod gateway; +pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; mod sync; -pub use sync::*; + +mod primary; +pub use primary::{Primary, PrimaryCallback}; mod worker; -pub use worker::*; +pub use worker::Worker; pub const CONTEXT: &str = "[MemoryPool]"; @@ -69,14 +68,3 @@ pub const MAX_WORKERS: u8 = 1; // worker(s) pub const PRIMARY_PING_IN_MS: u64 = 2 * MAX_BATCH_DELAY_IN_MS; // ms /// The interval at which each worker broadcasts a ping to every other node. pub const WORKER_PING_IN_MS: u64 = 4 * MAX_BATCH_DELAY_IN_MS; // ms - -/// A helper macro to spawn a blocking task. -#[macro_export] -macro_rules! spawn_blocking { - ($expr:expr) => { - match tokio::task::spawn_blocking(move || $expr).await { - Ok(value) => value, - Err(error) => Err(anyhow::anyhow!("[tokio::spawn_blocking] {error}")), - } - }; -} diff --git a/node/bft/src/lib.rsbeGfet.bck b/node/bft/src/lib.rsbeGfet.bck new file mode 100644 index 0000000000..c1b4b5b546 --- /dev/null +++ b/node/bft/src/lib.rsbeGfet.bck @@ -0,0 +1,76 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![forbid(unsafe_code)] +#![allow(clippy::blocks_in_conditions)] +#![allow(clippy::type_complexity)] + +#[macro_use] +extern crate async_trait; +#[macro_use] +extern crate tracing; + +#[cfg(feature = "metrics")] +extern crate snarkos_node_metrics as metrics; + +pub use snarkos_node_bft_events as events; +pub use snarkos_node_bft_ledger_service as ledger_service; +pub use snarkos_node_bft_storage_service as storage_service; + +pub mod helpers; + +mod bft; +pub use bft::{BFT, BftCallback}; + +<<<<<<< HEAD +pub mod gateway; +pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; +======= +mod gateway; +pub use gateway::Gateway; +>>>>>>> 1bddd475a (misc(bft): get rid of bft channels) + +mod primary; +pub use primary::*; + +mod sync; +pub use sync::*; + +mod worker; +pub use worker::*; + +pub const CONTEXT: &str = "[MemoryPool]"; + +/// The port on which the memory pool listens for incoming connections. +pub const MEMORY_POOL_PORT: u16 = 5000; // port + +/// The maximum number of milliseconds to wait before proposing a batch. +pub const MAX_BATCH_DELAY_IN_MS: u64 = 2500; // ms +/// The minimum number of seconds to wait before proposing a batch. +pub const MIN_BATCH_DELAY_IN_SECS: u64 = 1; // seconds +/// The maximum number of milliseconds to wait before timing out on a fetch. +pub const MAX_FETCH_TIMEOUT_IN_MS: u64 = 3 * MAX_BATCH_DELAY_IN_MS; // ms +/// The maximum number of seconds allowed for the leader to send their certificate. +pub const MAX_LEADER_CERTIFICATE_DELAY_IN_SECS: i64 = 2 * MAX_BATCH_DELAY_IN_MS as i64 / 1000; // seconds +/// The maximum number of seconds before the timestamp is considered expired. +pub const MAX_TIMESTAMP_DELTA_IN_SECS: i64 = 10; // seconds +/// The maximum number of workers that can be spawned. +pub const MAX_WORKERS: u8 = 1; // worker(s) + +/// The interval at which each primary broadcasts a ping to every other node. +/// Note: If this is updated, be sure to update `MAX_BLOCKS_BEHIND` to correspond properly. +pub const PRIMARY_PING_IN_MS: u64 = 2 * MAX_BATCH_DELAY_IN_MS; // ms +/// The interval at which each worker broadcasts a ping to every other node. +pub const WORKER_PING_IN_MS: u64 = 4 * MAX_BATCH_DELAY_IN_MS; // ms diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 41e0b50083..8975e60f9f 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -14,20 +14,16 @@ // limitations under the License. use crate::{ - Gateway, MAX_BATCH_DELAY_IN_MS, MAX_WORKERS, MIN_BATCH_DELAY_IN_SECS, PRIMARY_PING_IN_MS, - Sync, - Transport, WORKER_PING_IN_MS, Worker, events::{BatchPropose, BatchSignature, Event}, + gateway::{Gateway, GatewayPrimaryCallback, Transport}, helpers::{ - BFTSender, - PrimaryReceiver, - PrimarySender, + CallbackHandle, Proposal, ProposalCache, SignedProposals, @@ -35,11 +31,10 @@ use crate::{ assign_to_worker, assign_to_workers, fmt_id, - init_sync_channels, init_worker_channels, now, }, - spawn_blocking, + sync::{Sync, SyncCallback}, }; use snarkos_account::Account; use snarkos_node_bft_events::PrimaryPing; @@ -47,18 +42,21 @@ use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_sync::{BlockSync, DUMMY_SELF_IP, Ping}; use snarkvm::{ console::{ + network::ConsensusVersion, prelude::*, types::{Address, Field}, }, ledger::{ block::Transaction, + committee::Committee, narwhal::{BatchCertificate, BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - prelude::{ConsensusVersion, committee::Committee}, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use futures::stream::{FuturesUnordered, StreamExt}; use indexmap::{IndexMap, IndexSet}; @@ -80,11 +78,21 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{sync::OnceCell, task::JoinHandle}; /// A helper type for an optional proposed batch. pub type ProposedBatch = RwLock>>; +/// This callback trait allows listening to changes in the Primary, such as round advancement. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait PrimaryCallback: Send + std::marker::Sync { + /// Notifies that a new round has started. + fn update_to_next_round(&self, current_round: u64) -> bool; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// The primary logic of a node. /// AleoBFT adopts a primary-worker architecture as described in the Narwhal and Tusk paper (Section 4.2). #[derive(Clone)] @@ -99,8 +107,8 @@ pub struct Primary { ledger: Arc>, /// The workers. workers: Arc<[Worker]>, - /// The BFT sender. - bft_sender: Arc>>, + /// The primary callback (used by [`BFT`]). + primary_callback: Arc>>>, /// The batch proposal, if the primary is currently proposing a batch. proposed_batch: Arc>, /// The timestamp of the most recent proposed batch. @@ -119,9 +127,9 @@ impl Primary { /// The maximum number of unconfirmed transmissions to send to the primary. pub const MAX_TRANSMISSIONS_TOLERANCE: usize = BatchHeader::::MAX_TRANSMISSIONS_PER_BATCH * 2; - /// Initializes a new primary instance. + /// Initializes a new primary instance and starts the gateway. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -135,22 +143,77 @@ impl Primary { let gateway = Gateway::new(account, storage.clone(), ledger.clone(), ip, trusted_validators, dev)?; // Initialize the sync module. let sync = Sync::new(gateway.clone(), storage.clone(), ledger.clone(), block_sync); + let proposed_batch = Arc::new(ProposedBatch::default()); + + // Construct a map of the worker senders. + let mut worker_senders = IndexMap::new(); + + // Initialize the workers. + let mut workers = Vec::new(); + for id in 0..MAX_WORKERS { + // Construct the worker channels. + let (tx_worker, rx_worker) = init_worker_channels(); + // Construct the worker instance. + let worker = + Worker::new(id, Arc::new(gateway.clone()), storage.clone(), ledger.clone(), proposed_batch.clone()) + .with_context(|| "Failed to initialize worker")?; + // Run the worker instance. + worker.run(rx_worker); + // Add the worker to the list of workers. + workers.push(worker); + // Add the worker sender to the map. + worker_senders.insert(id, tx_worker); + } // Initialize the primary instance. - Ok(Self { + let obj = Self { sync, - gateway, + gateway: gateway.clone(), storage, ledger, - workers: Arc::from(vec![]), - bft_sender: Default::default(), - proposed_batch: Default::default(), + workers: Arc::from(workers), + primary_callback: Default::default(), + proposed_batch, latest_proposed_batch_timestamp: Default::default(), signed_proposals: Default::default(), handles: Default::default(), propose_lock: Default::default(), storage_mode, - }) + }; + + // Next, initialize the gateway. + let gateway_primary_callback = Arc::new(obj.clone()) as Arc>; + let gateway_sync_callback = Arc::new(obj.sync.clone()); + obj.gateway.run(worker_senders, gateway_primary_callback, Some(gateway_sync_callback)).await?; + + Ok(obj) + } + + /// Starts all remaining (background) tasks needed for the primary instance. + pub async fn run( + &self, + ping: Option>>, + primary_callback: Option>>, + sync_callback: Option>>, + ) -> Result<()> { + info!("Starting the primary instance of the memory pool..."); + + // Set the BFT sender. + if let Some(callback) = primary_callback { + self.primary_callback.set(callback)?; + } + + // Next, initialize the sync module and sync the storage from ledger. + self.sync.initialize(sync_callback).await?; + // Next, load and process the proposal cache before running the sync module. + self.load_proposal_cache().await?; + // Next, run the sync module. + self.sync.run(ping).await?; + // Lastly, start the primary handlers. + // Note: This ensures the primary does not start communicating before syncing is complete. + self.start_handlers(); + + Ok(()) } /// Load the proposal cache file and update the Primary state with the stored data. @@ -193,65 +256,6 @@ impl Primary { } } - /// Run the primary instance. - pub async fn run( - &mut self, - ping: Option>>, - bft_sender: Option>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { - info!("Starting the primary instance of the memory pool..."); - - // Set the BFT sender. - if let Some(bft_sender) = &bft_sender { - // Set the BFT sender in the primary. - self.bft_sender.set(bft_sender.clone()).expect("BFT sender already set"); - } - - // Construct a map of the worker senders. - let mut worker_senders = IndexMap::new(); - // Construct a map for the workers. - let mut workers = Vec::new(); - // Initialize the workers. - for id in 0..MAX_WORKERS { - // Construct the worker channels. - let (tx_worker, rx_worker) = init_worker_channels(); - // Construct the worker instance. - let worker = Worker::new( - id, - Arc::new(self.gateway.clone()), - self.storage.clone(), - self.ledger.clone(), - self.proposed_batch.clone(), - )?; - // Run the worker instance. - worker.run(rx_worker); - // Add the worker to the list of workers. - workers.push(worker); - // Add the worker sender to the map. - worker_senders.insert(id, tx_worker); - } - // Set the workers. - self.workers = Arc::from(workers); - - // First, initialize the sync channels. - let (sync_sender, sync_receiver) = init_sync_channels(); - // Next, initialize the sync module and sync the storage from ledger. - self.sync.initialize(bft_sender).await?; - // Next, load and process the proposal cache before running the sync module. - self.load_proposal_cache().await?; - // Next, run the sync module. - self.sync.run(ping, sync_receiver).await?; - // Next, initialize the gateway. - self.gateway.run(primary_sender, worker_senders, Some(sync_sender)).await; - // Lastly, start the primary handlers. - // Note: This ensures the primary does not start communicating before syncing is complete. - self.start_handlers(primary_receiver); - - Ok(()) - } - /// Returns the current round. pub fn current_round(&self) -> u64 { self.storage.current_round() @@ -405,7 +409,7 @@ impl Primary { // Resend the batch proposal to the validator for signing. Some(peer_ip) => { let (gateway, event_, round) = (self.gateway.clone(), event.clone(), proposal.round()); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending batch proposal for round {round} to peer '{peer_ip}'"); // Resend the batch proposal to the peer. if gateway.send(peer_ip, event_).await.is_none() { @@ -432,17 +436,12 @@ impl Primary { // Ensure the primary has not proposed a batch for this round before. if self.storage.contains_certificate_in_round_from(round, self.gateway.account().address()) { // If a BFT sender was provided, attempt to advance the current round. - if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(self.current_round()).await { + if let Some(cb) = &*self.primary_callback.get_ref() { + match cb.update_to_next_round(self.current_round()) { // 'is_ready' is true if the primary is ready to propose a batch for the next round. - Ok(true) => (), // continue, + true => (), // continue, // 'is_ready' is false if the primary is not ready to propose a batch for the next round. - Ok(false) => return Ok(()), - // An error occurred while attempting to advance the current round. - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } + false => return Ok(()), } } debug!("Primary is safely skipping {}", format!("(round {round} was already certified)").dimmed()); @@ -571,14 +570,13 @@ impl Primary { } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -677,15 +675,18 @@ impl Primary { // Prepare the previous batch certificate IDs. let previous_certificate_ids = previous_certificates.into_iter().map(|c| c.id()).collect(); // Sign the batch header and construct the proposal. - let (batch_header, proposal) = spawn_blocking!(BatchHeader::new( - &private_key, - round, - current_timestamp, - committee_id, - transmission_ids, - previous_certificate_ids, - &mut rand::thread_rng() - )) + let (batch_header, proposal) = task::spawn_blocking(move || { + BatchHeader::new( + &private_key, + round, + current_timestamp, + committee_id, + transmission_ids, + previous_certificate_ids, + &mut rand::thread_rng(), + ) + }) + .await .and_then(|batch_header| { Proposal::new(committee_lookback, batch_header.clone(), transmissions.clone()) .map(|proposal| (batch_header, proposal)) @@ -718,7 +719,7 @@ impl Primary { let BatchPropose { round: batch_round, batch_header } = batch_propose; // Deserialize the batch header. - let batch_header = spawn_blocking!(batch_header.deserialize_blocking())?; + let batch_header = task::spawn_blocking(|| batch_header.deserialize_blocking()).await?; // Ensure the round matches in the batch header. if batch_round != batch_header.round() { // Proceed to disconnect the validator. @@ -784,7 +785,7 @@ impl Primary { // Instead, rebroadcast the cached signature to the peer. if signed_round == batch_header.round() && signed_batch_id == batch_header.batch_id() { let gateway = self.gateway.clone(); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending a signature for a batch in round {batch_round} from '{peer_ip}'"); let event = Event::BatchSignature(BatchSignature::new(batch_header.batch_id(), signature)); // Resend the batch signature to the peer. @@ -848,8 +849,10 @@ impl Primary { // Ensure the batch header from the peer is valid. let (storage, header) = (self.storage.clone(), batch_header.clone()); - let missing_transmissions = - spawn_blocking!(storage.check_batch_header(&header, missing_transmissions, Default::default()))?; + let missing_transmissions = task::spawn_blocking(move || { + storage.check_batch_header(&header, missing_transmissions, Default::default()) + }) + .await?; // Inserts the missing transmissions into the workers. self.insert_missing_transmissions_into_workers(peer_ip, missing_transmissions.into_iter())?; @@ -874,14 +877,13 @@ impl Primary { (transmission_id, transmission) { // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -942,7 +944,7 @@ impl Primary { let batch_id = batch_header.batch_id(); // Sign the batch ID. let account = self.gateway.account().clone(); - let signature = spawn_blocking!(account.sign(&[batch_id], &mut rand::thread_rng()))?; + let signature = task::spawn_blocking(move || account.sign(&[batch_id], &mut rand::thread_rng())).await?; // Ensure the proposal has not already been signed. // @@ -970,7 +972,7 @@ impl Primary { // Broadcast the signature back to the validator. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { let event = Event::BatchSignature(BatchSignature::new(batch_id, signature)); // Send the batch signature to the peer. if self_.gateway.send(peer_ip, event).await.is_some() { @@ -1015,7 +1017,7 @@ impl Primary { } let self_ = self.clone(); - let Some(proposal) = spawn_blocking!({ + let Some(proposal) = task::spawn_blocking(move || { // Acquire the write lock. let mut proposed_batch = self_.proposed_batch.write(); // Add the signature to the batch, and determine if the batch is ready to be certified. @@ -1063,7 +1065,7 @@ impl Primary { Some(proposal) => Ok(Some(proposal)), None => Ok(None), } - })? + }).await? else { return Ok(()); }; @@ -1190,16 +1192,7 @@ impl Primary { /// tries to move the the next round of batches. /// /// This function is called exactly once, in `Self::run()`. - fn start_handlers(&self, primary_receiver: PrimaryReceiver) { - let PrimaryReceiver { - mut rx_batch_propose, - mut rx_batch_signature, - mut rx_batch_certified, - mut rx_primary_ping, - mut rx_unconfirmed_solution, - mut rx_unconfirmed_transaction, - } = primary_receiver; - + fn start_handlers(&self) { // Start the primary ping sender. let self_ = self.clone(); self.spawn(async move { @@ -1209,7 +1202,7 @@ impl Primary { // Retrieve the block locators. let self__ = self_.clone(); - let block_locators = match spawn_blocking!(self__.sync.get_block_locators()) { + let block_locators = match task::spawn_blocking(move || self__.sync.get_block_locators()).await { Ok(block_locators) => block_locators, Err(e) => { warn!("Failed to retrieve block locators - {e}"); @@ -1256,39 +1249,6 @@ impl Primary { } }); - // Start the primary ping handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, primary_certificate)) = rx_primary_ping.recv().await { - // If the primary is not synced, then do not process the primary ping. - if self_.sync.is_synced() { - trace!("Processing new primary ping from '{peer_ip}'"); - } else { - trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - - // Spawn a task to process the primary certificate. - { - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) - else { - warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); - return; - }; - // Process the primary certificate. - let id = fmt_id(primary_certificate.id()); - let round = primary_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { - warn!("Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}"); - } - }); - } - } - }); - // Start the worker ping(s). let self_ = self.clone(); self.spawn(async move { @@ -1336,75 +1296,6 @@ impl Primary { } }); - // Start the proposed batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_propose)) = rx_batch_propose.recv().await { - // If the primary is not synced, then do not sign the batch. - if !self_.sync.is_synced() { - trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the proposed batch. - let self_ = self_.clone(); - tokio::spawn(async move { - // Process the batch proposal. - let round = batch_propose.round; - if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { - warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - - // Start the batch signature handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_signature)) = rx_batch_signature.recv().await { - // If the primary is not synced, then do not store the signature. - if !self_.sync.is_synced() { - trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Process the batch signature. - // Note: Do NOT spawn a task around this function call. Processing signatures from peers - // is a critical path, and we should only store the minimum required number of signatures. - // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), - // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. - let id = fmt_id(batch_signature.batch_id); - if let Err(e) = self_.process_batch_signature_from_peer(peer_ip, batch_signature).await { - warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); - } - } - }); - - // Start the certified batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_certificate)) = rx_batch_certified.recv().await { - // If the primary is not synced, then do not store the certificate. - if !self_.sync.is_synced() { - trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the batch certificate. - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { - warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); - return; - }; - // Process the batch certificate. - let id = fmt_id(batch_certificate.id()); - let round = batch_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { - warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - // This task periodically tries to move to the next round. // // Note: This is necessary to ensure that the primary is not stuck on a previous round @@ -1447,59 +1338,6 @@ impl Primary { } } }); - - // Start a handler to process new unconfirmed solutions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((solution_id, solution, callback)) = rx_unconfirmed_solution.recv().await { - // Compute the checksum for the solution. - let Ok(checksum) = solution.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed solution"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker((solution_id, checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed solution"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed solution. - let result = worker.process_unconfirmed_solution(solution_id, solution).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); - - // Start a handler to process new unconfirmed transactions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((transaction_id, transaction, callback)) = rx_unconfirmed_transaction.recv().await { - trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); - // Compute the checksum for the transaction. - let Ok(checksum) = transaction.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed transaction"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed transaction"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed transaction. - let result = worker.process_unconfirmed_transaction(transaction_id, transaction).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); } /// Checks if the proposed batch is expired, and clears the proposed batch if it has expired. @@ -1540,14 +1378,8 @@ impl Primary { // Attempt to advance to the next round. if current_round < next_round { // If a BFT sender was provided, send the current round to the BFT. - let is_ready = if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(current_round).await { - Ok(is_ready) => is_ready, - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } - } + let is_ready = if let Some(cb) = self.primary_callback.get() { + cb.update_to_next_round(current_round) } // Otherwise, handle the Narwhal case. else { @@ -1631,15 +1463,15 @@ impl Primary { let transmissions = transmissions.into_iter().collect::>(); // Store the certified batch. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, transmissions, Default::default()))?; + task::spawn_blocking(move || storage.insert_certificate(certificate_, transmissions, Default::default())) + .await?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { + if let Some(cb) = self.primary_callback.get() { // Await the callback to continue. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate.clone()).await { - warn!("Failed to update the BFT DAG from primary - {e}"); - return Err(e); - }; + cb.add_new_certificate(certificate.clone()) + .await + .with_context(|| "Failed to add new certificate from primary")?; } // Broadcast the certified batch to all validators. self.gateway.broadcast(Event::BatchCertified(certificate.clone().into())); @@ -1718,15 +1550,14 @@ impl Primary { if !self.storage.contains_certificate(certificate.id()) { // Store the batch certificate. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, missing_transmissions, Default::default()))?; + task::spawn_blocking(move || { + storage.insert_certificate(certificate_, missing_transmissions, Default::default()) + }) + .await?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { - // Send the certificate to the BFT. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate).await { - warn!("Failed to update the BFT DAG from sync: {e}"); - return Err(e); - }; + if let Some(cb) = self.primary_callback.get() { + cb.add_new_certificate(certificate).await.with_context(|| "Failed to update the DAG from sync")?; } } Ok(()) @@ -1931,16 +1762,20 @@ impl Primary { impl Primary { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the primary..."); + // Remove the callback. + self.primary_callback.clear(); + // Stop syncing. + self.sync.shut_down().await; // Shut down the workers. self.workers.iter().for_each(|worker| worker.shut_down()); // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Save the current proposal cache to disk. let proposal_cache = { let proposal = self.proposed_batch.write().take(); @@ -1957,8 +1792,143 @@ impl Primary { } } +/// Handle events from the Gateway +#[async_trait::async_trait] +impl GatewayPrimaryCallback for Primary { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>) { + // If the primary is not synced, then do not process the primary ping. + if self.sync.is_synced() { + trace!("Processing new primary ping from '{peer_ip}'"); + } else { + trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + + // Spawn a task to process the primary certificate. + { + let self_ = self.clone(); + task::spawn(async move { + // Deserialize the primary certificate in the primary ping. + let Ok(primary_certificate) = task::spawn_blocking(|| primary_certificate.deserialize_blocking()).await + else { + warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); + return; + }; + // Process the primary certificate. + let id = fmt_id(primary_certificate.id()); + let round = primary_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { + warn!( + "Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}" + ); + } + }); + } + } + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose) { + // If the primary is not synced, then do not sign the batch. + if !self.sync.is_synced() { + trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the proposed batch. + let self_ = self.clone(); + task::spawn(async move { + // Process the batch proposal. + let round = batch_propose.round; + if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { + warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); + } + }); + } + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature) { + // If the primary is not synced, then do not store the signature. + if !self.sync.is_synced() { + trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Process the batch signature. + // Note: Do NOT spawn a task around this function call. Processing signatures from peers + // is a critical path, and we should only store the minimum required number of signatures. + // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), + // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. + let id = fmt_id(batch_signature.batch_id); + if let Err(e) = self.process_batch_signature_from_peer(peer_ip, batch_signature).await { + warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); + } + } + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>) { + // If the primary is not synced, then do not store the certificate. + if !self.sync.is_synced() { + trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the batch certificate. + let self_ = self.clone(); + task::spawn(async move { + // Deserialize the batch certificate. + let Ok(batch_certificate) = task::spawn_blocking(|| batch_certificate.deserialize_blocking()).await else { + warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); + return; + }; + // Process the batch certificate. + let id = fmt_id(batch_certificate.id()); + let round = batch_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { + warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); + } + }); + } +} + +/// Invoked by the mempool ("Consensus"). +impl Primary { + pub async fn process_unconfirmed_solution( + &self, + solution_id: SolutionID, + solution: Data>, + ) -> Result<()> { + // Compute the checksum for the solution. + let Ok(checksum) = solution.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed solution"); + }; + + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker((solution_id, checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed solution"); + }; + + // Wait for the worker to process the unconfirmed solution. + self.workers[worker_id as usize].process_unconfirmed_solution(solution_id, solution).await + } + + pub async fn process_unconfirmed_transaction( + &self, + transaction_id: N::TransactionID, + transaction: Data>, + ) -> Result<()> { + trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); + // Compute the checksum for the transaction. + let Ok(checksum) = transaction.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed transaction"); + }; + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed transaction"); + }; + + // Wait for the worker to process the unconfirmed transaction. + self.workers[worker_id as usize].process_unconfirmed_transaction(transaction_id, transaction).await + } +} + #[cfg(test)] mod tests { + use std::net::{Ipv4Addr, SocketAddrV4}; + use super::*; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -1995,7 +1965,7 @@ mod tests { } // Returns a primary and a list of accounts in the configured committee. - fn primary_with_committee( + async fn primary_with_committee( account_index: usize, accounts: &[(SocketAddr, Account)], committee: Committee, @@ -2004,11 +1974,16 @@ mod tests { let ledger = Arc::new(MockLedgerService::new_at_height(committee, height)); let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 10); + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the primary. let account = accounts[account_index].1.clone(); let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut primary = - Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).unwrap(); + Primary::new(account, storage, ledger, block_sync, Some(any_addr), &[], StorageMode::Test(None), None) + .await + .unwrap(); // Construct a worker instance. primary.workers = Arc::from([Worker::new( @@ -2026,7 +2001,7 @@ mod tests { primary } - fn primary_without_handlers( + async fn primary_without_handlers( rng: &mut TestRng, ) -> (Primary, Vec<(SocketAddr, Account)>) { let (accounts, committee) = sample_committee(rng); @@ -2035,7 +2010,8 @@ mod tests { &accounts, committee, CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V1).unwrap(), - ); + ) + .await; (primary, accounts) } @@ -2233,10 +2209,11 @@ mod tests { } } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2254,10 +2231,11 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_no_transmissions() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2267,11 +2245,12 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_in_round() { let round = 3; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Fill primary storage. store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2292,12 +2271,13 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_skip_transmissions_from_previous_certificates() { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; @@ -2364,6 +2344,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_over_spend_limit() { let mut rng = TestRng::default(); @@ -2375,7 +2356,8 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2400,10 +2382,11 @@ mod tests { assert_eq!(primary.workers().iter().map(|worker| worker.transmissions().len()).sum::(), 3); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2439,10 +2422,11 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_when_not_synced() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2476,11 +2460,12 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2516,10 +2501,11 @@ mod tests { primary.process_batch_propose_from_peer(peer_ip, (*proposal.batch_header()).clone().into()).await.unwrap(); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_wrong_round() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2558,11 +2544,12 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round_wrong_round() { let round = 4; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2604,11 +2591,12 @@ mod tests { } /// Tests that the minimum batch delay is enforced as expected, i.e., that proposals with timestamps that are too close to the previous proposal are rejected. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_with_past_timestamp() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2654,6 +2642,7 @@ mod tests { } /// Check that proposals rejected that have timestamps older than the previous proposal. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_over_spend_limit() { let mut rng = TestRng::default(); @@ -2665,13 +2654,15 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; let primary_v5 = primary_with_committee( 1, &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V5).unwrap(), - ); + ) + .await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2717,11 +2708,12 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal_lock() { let round = 3; let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2750,11 +2742,12 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate previous certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2780,10 +2773,11 @@ mod tests { assert!(primary.proposed_batch.read().as_ref().unwrap().round() > primary.current_round()); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2816,11 +2810,12 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer_in_round() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2855,10 +2850,11 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_no_quorum() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2890,11 +2886,12 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_in_round_no_quorum() { let round = 7; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2928,12 +2925,13 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_insert_certificate_with_aborted_transmissions() { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync.rs similarity index 88% rename from node/bft/src/sync/mod.rs rename to node/bft/src/sync.rs index 85ad6c0a7e..9c30c4367e 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync.rs @@ -14,24 +14,28 @@ // limitations under the License. use crate::{ - Gateway, MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, - Transport, - events::DataBlocks, - helpers::{BFTSender, Pending, Storage, SyncReceiver, fmt_id, max_redundant_requests}, - spawn_blocking, + events::{CertificateRequest, CertificateResponse, DataBlocks, Event}, + gateway::{Gateway, GatewaySyncCallback, Transport}, + helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, + ledger_service::LedgerService, }; -use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncRequest, locators::BlockLocators}; use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, - prelude::{cfg_into_iter, cfg_iter}, + utilities::{ + LoggableError, + cfg_into_iter, + cfg_iter, + spawn_blocking, + task::{self, JoinHandle}, + }, }; -use anyhow::{Result, anyhow, bail}; +use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; @@ -44,19 +48,29 @@ use std::{ future::Future, net::SocketAddr, sync::Arc, - time::Duration, + time::{Duration, Instant}, }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; use tokio::{ - sync::{OnceCell, oneshot}, - task::JoinHandle, + sync::oneshot, + time::{sleep, timeout}, }; +/// This callback trait allows listening to synchronization updates, such as discorvering new `BatchCertificate`s. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait SyncCallback: Send + std::marker::Sync { + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()>; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// Block synchronization logic for validators. /// /// Synchronization works differently for nodes that act as validators in AleoBFT; -/// In the common case, validators generate blocks after receiving an anchor block that has been accepted +/// In the common case, validators generate blocks after receiving an anchor certificate that has been accepted /// by a supermajority of the committee instead of fetching entire blocks from other nodes. /// However, if a validator does not have an up-to-date DAG, it might still fetch entire blocks from other nodes. /// @@ -77,8 +91,8 @@ pub struct Sync { block_sync: Arc>, /// The pending certificates queue. pending: Arc, BatchCertificate>>, - /// The BFT sender. - bft_sender: Arc>>, + /// The sync callback (used by [`BFT`]). + sync_callback: Arc>>>, /// Handles to the spawned background tasks. handles: Arc>>>, /// The response lock. @@ -96,6 +110,8 @@ pub struct Sync { } impl Sync { + const SYNC_INTERVAL: Duration = Duration::from_millis(PRIMARY_PING_IN_MS); + /// Initializes a new sync instance. pub fn new( gateway: Gateway, @@ -110,7 +126,7 @@ impl Sync { ledger, block_sync, pending: Default::default(), - bft_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), response_lock: Default::default(), sync_lock: Default::default(), @@ -119,10 +135,10 @@ impl Sync { } /// Initializes the sync module and sync the storage with the ledger at bootup. - pub async fn initialize(&self, bft_sender: Option>) -> Result<()> { - // If a BFT sender was provided, set it. - if let Some(bft_sender) = bft_sender { - self.bft_sender.set(bft_sender).expect("BFT sender already set in gateway"); + pub async fn initialize(&self, sync_callback: Option>>) -> Result<()> { + // If a callback was provided, set it. + if let Some(callback) = sync_callback { + self.sync_callback.set(callback)?; } info!("Syncing storage with the ledger..."); @@ -162,7 +178,7 @@ impl Sync { /// /// When this function returns successfully, the sync module will have spawned background tasks /// that fetch blocks from other validators. - pub async fn run(&self, ping: Option>>, sync_receiver: SyncReceiver) -> Result<()> { + pub async fn run(&self, ping: Option>>) -> Result<()> { info!("Starting the sync module..."); // Start the block sync loop. @@ -172,20 +188,29 @@ impl Sync { // Ideally, a node does not consider itself synced when it has not received // any block locators from peers. However, in the initial bootup of validators, // this needs to happen, so we use this additional sleep as a grace period. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + + let mut last_update = Instant::now(); loop { - // Sleep briefly to avoid triggering spam detection. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + // Make sure we do not sync too often + let now = Instant::now(); + let elapsed = now.saturating_duration_since(last_update); + let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); + + if !sleep_time.is_zero() { + sleep(sleep_time).await; + } let new_blocks = self_.try_block_sync().await; if new_blocks { if let Some(ping) = &ping { match self_.get_block_locators() { Ok(locators) => ping.update_block_locators(locators), - Err(err) => error!("Failed to update block locators: {err}"), + Err(err) => err.log_error("Failed to update block locators"), } } } + last_update = now; } }); @@ -194,86 +219,14 @@ impl Sync { self.spawn(async move { loop { // Sleep briefly. - tokio::time::sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; + sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; // Remove the expired pending transmission requests. let self__ = self_.clone(); - let _ = spawn_blocking!({ + spawn_blocking(move || { self__.pending.clear_expired_callbacks(); - Ok(()) - }); - } - }); - - /* Set up callbacks for events from the Gateway */ - - // Retrieve the sync receiver. - let SyncReceiver { - mut rx_block_sync_advance_with_sync_blocks, - mut rx_block_sync_remove_peer, - mut rx_block_sync_update_peer_locators, - mut rx_certificate_request, - mut rx_certificate_response, - } = sync_receiver; - - // Process the block sync request to advance with sync blocks. - // Each iteration of this loop is triggered by an incoming [`BlockResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::advance_with_sync_blocks()`], - // which calls [`tx_block_sync_advance_with_sync_blocks.send()`], - // which causes the `rx_block_sync_advance_with_sync_blocks.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, blocks, callback)) = rx_block_sync_advance_with_sync_blocks.recv().await { - callback.send(self_.advance_with_sync_blocks(peer_ip, blocks).await).ok(); - } - }); - - // Process the block sync request to remove the peer. - let self_ = self.clone(); - self.spawn(async move { - while let Some(peer_ip) = rx_block_sync_remove_peer.recv().await { - self_.remove_peer(peer_ip); - } - }); - - // Process each block sync request to update peer locators. - // Each iteration of this loop is triggered by an incoming [`PrimaryPing`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::update_peer_locators()`], - // which calls [`tx_block_sync_update_peer_locators.send()`], - // which causes the `rx_block_sync_update_peer_locators.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, locators, callback)) = rx_block_sync_update_peer_locators.recv().await { - let self_clone = self_.clone(); - tokio::spawn(async move { - callback.send(self_clone.update_peer_locators(peer_ip, locators)).ok(); - }); - } - }); - - // Process each certificate request. - // Each iteration of this loop is triggered by an incoming [`CertificateRequest`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_request.send()`], - // which causes the `rx_certificate_request.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_request)) = rx_certificate_request.recv().await { - self_.send_certificate_response(peer_ip, certificate_request); - } - }); - - // Process each certificate response. - // Each iteration of this loop is triggered by an incoming [`CertificateResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_response.send()`], - // which causes the `rx_certificate_response.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_response)) = rx_certificate_response.recv().await { - self_.finish_certificate_request(peer_ip, certificate_response); + }) + .await; } }); @@ -295,6 +248,9 @@ impl Sync { self.send_block_requests(sync_peers, requests).await; } + // Wait for updates or a timeout. + let _ = timeout(Self::SYNC_INTERVAL, self.block_sync.wait_for_update()).await; + // Do not attempt to sync if there are no blocks to sync. // This prevents redundant log messages and performing unnecessary computation. if !self.block_sync.can_block_sync() { @@ -311,27 +267,24 @@ impl Sync { match self.try_advancing_block_synchronization().await { Ok(new_blocks) => new_blocks, Err(err) => { - error!("Block synchronization failed - {err}"); + err.log_error("Block synchronization failed"); false } } } + + /// Test-only. Manually add peer locators. + #[cfg(test)] + pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { + self.update_peer_locators(peer_ip, locators) + } } // Callbacks used when receiving messages from the Gateway -impl Sync { +impl GatewaySyncCallback for Sync { /// We received a block response and can (possibly) advance synchronization. - async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Verify that the response is valid and add it to block sync. - self.block_sync.insert_block_responses(peer_ip, blocks)?; - - // Try to process responses stored in BlockSync. - // Note: Do not call `self.block_sync.try_advancing_block_synchronziation` here as it will process - // and remove any completed requests, which means the call to `sync_storage_with_blocks` will not process - // them as expected. - self.try_advancing_block_synchronization().await?; - - Ok(()) + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { + self.block_sync.insert_block_responses(peer_ip, blocks) } /// We received new peer locators during a Ping. @@ -344,9 +297,30 @@ impl Sync { self.block_sync.remove_peer(&peer_ip); } - #[cfg(test)] - pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { - self.update_peer_locators(peer_ip, locators) + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { + // Attempt to retrieve the certificate. + if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { + // Send the certificate response to the peer. + let self_ = self.clone(); + task::spawn(async move { + let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; + }); + } + } + + /// Handles the incoming certificate response. + /// This method ensures the certificate response is well-formed and matches the certificate ID. + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { + let certificate = response.certificate; + // Check if the peer IP exists in the pending queue for the given certificate ID. + let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); + // If the peer IP exists, finish the pending request. + if exists { + // TODO: Validate the certificate. + // Remove the certificate ID from the pending queue. + self.pending.remove(certificate.id(), Some(certificate)); + } } } @@ -429,12 +403,9 @@ impl Sync { .flatten() .collect::>(); - // If a BFT sender was provided, send the certificates to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { - // Await the callback to continue. - if let Err(e) = bft_sender.tx_sync_bft_dag_at_bootup.send(certificates).await { - bail!("Failed to update the BFT DAG from sync: {e}"); - } + // If a callback was provided, send the certificates to it. + if let Some(cb) = self.sync_callback.get() { + cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; } self.block_sync.set_sync_height(block_height); @@ -582,7 +553,7 @@ impl Sync { if within_gc { info!("Finished catching up with the network. Switching back to BFT sync."); if let Err(err) = self.sync_storage_with_ledger_at_bootup().await { - error!("BFT sync (with bootup routine) failed - {err}"); + err.log_error("BFT sync (with bootup routine) failed"); } } @@ -598,7 +569,7 @@ impl Sync { let _lock = self.sync_lock.lock().await; let self_ = self.clone(); - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the next block. self_.ledger.check_next_block(&block)?; // Attempt to advance to the next block. @@ -613,7 +584,7 @@ impl Sync { Ok(()) }) - .await? + .await } /// Advances the ledger by the given block and updates the storage accordingly. @@ -666,13 +637,12 @@ impl Sync { // Sync the BFT DAG with the certificates. for certificate in certificates { - // If a BFT sender was provided, send the certificate to the BFT. + // If a callback was provided, send the certificate to ti. // For validators, BFT spawns a receiver task in `BFT::start_handlers`. - if let Some(bft_sender) = self.bft_sender.get() { - // Await the callback to continue. - if let Err(err) = bft_sender.send_sync_bft(certificate).await { - bail!("Failed to sync certificate - {err}"); - }; + if let Some(cb) = self.sync_callback.get() { + cb.add_new_certificate(certificate) + .await + .with_context(|| "Failed to sync certificate - {err}")?; } } } @@ -841,11 +811,6 @@ impl Sync { self.block_sync.is_block_synced() } - /// Returns the number of blocks the node is behind the greatest peer height. - pub fn num_blocks_behind(&self) -> Option { - self.block_sync.num_blocks_behind() - } - /// Returns the current block locators of the node. pub fn get_block_locators(&self) -> Result> { self.block_sync.get_block_locators() @@ -889,56 +854,32 @@ impl Sync { } // Wait for the certificate to be fetched. // TODO (raychu86): Consider making the timeout dynamic based on network traffic and/or the number of validators. - match tokio::time::timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the certificate was fetched, return it. - Ok(result) => Ok(result?), - // If the certificate was not fetched, return an error. - Err(e) => bail!("Unable to fetch certificate {} - (timeout) {e}", fmt_id(certificate_id)), - } - } - - /// Handles the incoming certificate request. - fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { - // Attempt to retrieve the certificate. - if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { - // Send the certificate response to the peer. - let self_ = self.clone(); - tokio::spawn(async move { - let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; - }); - } - } + let cert = timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))? + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))?; - /// Handles the incoming certificate response. - /// This method ensures the certificate response is well-formed and matches the certificate ID. - fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { - let certificate = response.certificate; - // Check if the peer IP exists in the pending queue for the given certificate ID. - let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); - // If the peer IP exists, finish the pending request. - if exists { - // TODO: Validate the certificate. - // Remove the certificate ID from the pending queue. - self.pending.remove(certificate.id(), Some(certificate)); - } + Ok(cert) } } impl Sync { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the sync module..."); + // Remove the callback. + self.sync_callback.clear(); // Acquire the response lock. let _lock = self.response_lock.lock().await; // Acquire the sync lock. let _lock = self.sync_lock.lock().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + // Abort all running tasks. + self.handles.lock().drain(..).for_each(|handle| handle.abort()); } } @@ -950,6 +891,7 @@ mod tests { use snarkos_account::Account; use snarkos_node_sync::BlockSync; + use snarkos_utilities::SimpleStoppable; use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -1001,7 +943,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample 5 rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { @@ -1174,7 +1116,7 @@ mod tests { // Initialize the syncing ledger. let syncing_ledger = Arc::new(CoreLedgerService::new( CurrentLedger::load(genesis, StorageMode::new_test(None)).unwrap(), - Default::default(), + SimpleStoppable::new(), )); // Initialize the gateway. let gateway = Gateway::new(account.clone(), storage.clone(), syncing_ledger.clone(), None, &[], None)?; @@ -1226,7 +1168,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { // Initialize the committee. diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 04c62ab6cd..c134e0709c 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -16,22 +16,27 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, MAX_WORKERS, - ProposedBatch, - Transport, events::{Event, TransmissionRequest, TransmissionResponse}, + gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, - spawn_blocking, + ledger_service::LedgerService, + primary::ProposedBatch, }; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkvm::{ - console::prelude::*, + console::{network::Network, prelude::Read}, ledger::{ block::Transaction, narwhal::{BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, + utilities::{ + FromBytes, + task::{self, JoinHandle}, + }, }; +use anyhow::{Context, Result, bail, ensure}; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -40,7 +45,7 @@ use locktick::parking_lot::{Mutex, RwLock}; use parking_lot::{Mutex, RwLock}; use rand::seq::IteratorRandom; use std::{future::Future, net::SocketAddr, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle, time::timeout}; +use tokio::{sync::oneshot, time::timeout}; /// A worker's main role is maintaining a queue of verified ("ready") transmissions, /// which will eventually be fetched by the primary when the primary generates a new batch. @@ -267,7 +272,7 @@ impl Worker { } // Attempt to fetch the transmission from the peer. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { // Send a transmission request to the peer. match self_.send_transmission_request(peer_ip, transmission_id).await { // If the transmission was fetched, then process it. @@ -322,7 +327,7 @@ impl Worker { if tx.is_execute() { let self_ = self.clone(); let tx_ = tx.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = self_.ledger.check_transaction_basic(tx_id, tx_).await; }); } @@ -390,12 +395,11 @@ impl Worker { bail!("Transaction '{}.{}' already exists.", fmt_id(transaction_id), fmt_id(checksum).dimmed()); } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?), - } - })?; + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)), + }) + .await?; // Check that the transaction is well-formed and unique. self.ledger.check_transaction_basic(transaction_id, transaction).await?; @@ -426,10 +430,7 @@ impl Worker { // Remove the expired pending certificate requests. let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.pending.clear_expired_callbacks(); - Ok(()) - }); + task::spawn_blocking(move || self__.pending.clear_expired_callbacks()).await; } }); @@ -455,10 +456,7 @@ impl Worker { while let Some((peer_ip, transmission_response)) = rx_transmission_response.recv().await { // Process the transmission response. let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.finish_transmission_request(peer_ip, transmission_response); - Ok(()) - }); + task::spawn_blocking(move || self__.finish_transmission_request(peer_ip, transmission_response)).await; } }); } @@ -498,12 +496,12 @@ impl Worker { ); } // Wait for the transmission to be fetched. - match timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the transmission was fetched, return it. - Ok(result) => Ok((transmission_id, result?)), - // If the transmission was not fetched, return an error. - Err(e) => bail!("Unable to fetch transmission - (timeout) {e}"), - } + let transmission = timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| "Unable to fetch transmission from peer - (timeout)")? + .with_context(|| "Unable to fetch transmission from peer")?; + + Ok((transmission_id, transmission)) } /// Handles the incoming transmission response. @@ -532,7 +530,7 @@ impl Worker { if let Some(transmission) = self.get_transmission(transmission_id) { // Send the transmission response to the peer. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { self_.gateway.send(peer_ip, Event::TransmissionResponse((transmission_id, transmission).into())).await; }); } @@ -540,14 +538,14 @@ impl Worker { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the worker. pub(crate) fn shut_down(&self) { trace!("Shutting down worker {}...", self.id); - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + // Abort and discard the tasks. + self.handles.lock().drain(..).for_each(|handle| handle.abort()); } } @@ -558,19 +556,25 @@ mod tests { use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ - console::{network::Network, types::Field}, + console::{ + network::{ConsensusVersion, Network}, + types::{Address, Field}, + }, ledger::{ block::Block, committee::Committee, narwhal::{BatchCertificate, Subdag, Transmission, TransmissionID}, snarkvm_ledger_test_helpers::sample_execution_transaction_with_fee, }, - prelude::Address, + prelude::{Itertools, Uniform}, + utilities::TestRng, }; + use anyhow::anyhow; use bytes::Bytes; use indexmap::IndexMap; use mockall::mock; + use rand::Rng; use std::{io, ops::Range}; type CurrentNetwork = snarkvm::prelude::MainnetV0; @@ -926,7 +930,7 @@ mod tests { for i in 1..=num_flood_requests { let worker_ = worker.clone(); let peer_ip = peer_ips.pop().unwrap(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -946,7 +950,7 @@ mod tests { // Flood the pending queue with transmission requests again, this time to a single peer for i in 1..=num_flood_requests { let worker_ = worker.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(first_peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -999,12 +1003,15 @@ mod tests { mod prop_tests { use super::*; use crate::Gateway; + use snarkos_node_bft_ledger_service::MockLedgerService; use snarkvm::{ console::account::Address, ledger::committee::{Committee, MIN_VALIDATOR_STAKE}, + prelude::TestRng, }; + use rand::Rng; use test_strategy::proptest; type CurrentNetwork = snarkvm::prelude::MainnetV0; diff --git a/node/bft/tests/bft_e2e.rs b/node/bft/tests/bft_e2e.rs index 5f2b0baeeb..042110c60f 100644 --- a/node/bft/tests/bft_e2e.rs +++ b/node/bft/tests/bft_e2e.rs @@ -39,7 +39,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -60,7 +61,8 @@ async fn test_resync() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: false, - }); + }) + .await; network.start().await; // Let the nodes advance through the rounds. @@ -77,7 +79,8 @@ async fn test_resync() { fire_transmissions: None, log_level: None, log_connections: false, - }); + }) + .await; spare_network.start().await; for i in 1..N { @@ -92,6 +95,7 @@ async fn test_resync() { deadline!(Duration::from_secs(20), move || { network_clone.is_round_reached(RECOVERY_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_threshold() { // Start N nodes but don't connect them. @@ -106,7 +110,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -145,6 +150,7 @@ async fn test_quorum_threshold() { deadline!(Duration::from_secs(20), move || { network.is_round_reached(TARGET_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_break() { // Start N nodes, connect them and start the cannons for each. @@ -158,7 +164,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -194,7 +201,8 @@ async fn test_leader_election_consistency() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Wait for starting round to be reached @@ -246,7 +254,8 @@ async fn test_transient_break() { // Set this to Some(0..=4) to see the logs. log_level: Some(6), log_connections: false, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index 3af72f1da7..aa59023d81 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -18,16 +18,13 @@ use crate::common::{ TranslucentLedgerService, utils::{fire_unconfirmed_solutions, fire_unconfirmed_transactions, initialize_logger}, }; + use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - MAX_BATCH_DELAY_IN_MS, - MEMORY_POOL_PORT, - Primary, - helpers::{PrimarySender, Storage, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -96,8 +93,6 @@ pub struct TestValidator { pub id: u16, /// The primary instance. When the BFT is enabled this is a clone of the BFT primary. pub primary: Primary, - /// The channel sender of the primary. - pub primary_sender: Option>, /// The BFT instance. This is only set if the BFT is enabled. pub bft: OnceLock>, /// The tokio handles of all long-running tasks associated with the validator (incl. cannons). @@ -108,9 +103,8 @@ pub type CurrentLedger = Ledger> impl TestValidator { pub fn fire_transmissions(&mut self, interval_ms: u64) { - let solution_handle = fire_unconfirmed_solutions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); - let transaction_handle = - fire_unconfirmed_transactions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); + let solution_handle = fire_unconfirmed_solutions(self.primary.clone(), self.id, interval_ms); + let transaction_handle = fire_unconfirmed_transactions(self.primary.clone(), self.id, interval_ms); self.handles.lock().push(solution_handle); self.handles.lock().push(transaction_handle); @@ -133,7 +127,7 @@ impl TestValidator { impl TestNetwork { // Creates a new test network with the given configuration. - pub fn new(config: TestNetworkConfig) -> Self { + pub async fn new(config: TestNetworkConfig) -> Self { let mut rng = TestRng::default(); if let Some(log_level) = config.log_level { @@ -159,7 +153,7 @@ impl TestNetwork { for (id, account) in accounts.into_iter().enumerate() { let gen_ledger = genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), &mut rng); - let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())); + let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())); let storage = Storage::new( ledger.clone(), Arc::new(BFTMemoryService::new()), @@ -173,11 +167,12 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, ) + .await .unwrap(); (bft.primary().clone(), Some(bft)) } else { @@ -186,22 +181,18 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, ) + .await .unwrap(); (primary, None) }; - let test_validator = TestValidator { - id: id as u16, - primary, - primary_sender: None, - bft: OnceLock::new(), - handles: Default::default(), - }; + let test_validator = + TestValidator { id: id as u16, primary, bft: OnceLock::new(), handles: Default::default() }; if let Some(bft) = bft { assert!(test_validator.bft.set(bft).is_ok()); } @@ -214,19 +205,16 @@ impl TestNetwork { // Starts each node in the network. pub async fn start(&mut self) { for validator in self.validators.values_mut() { - let (primary_sender, primary_receiver) = init_primary_channels(); - validator.primary_sender = Some(primary_sender.clone()); - // let ledger_service = validator.primary.ledger().clone(); // let sync = BlockSync::new(BlockSyncMode::Gateway, ledger_service); // sync.try_block_sync(validator.primary.gateway()).await.unwrap(); if let Some(bft) = validator.bft.get_mut() { // Setup the channels and start the bft. - bft.run(None, None, primary_sender, primary_receiver).await.unwrap(); + bft.run(None, None).await.unwrap(); } else { // Setup the channels and start the primary. - validator.primary.run(None, None, primary_sender, primary_receiver).await.unwrap(); + validator.primary.run(None, None, None).await.unwrap(); } if let Some(interval_ms) = self.config.fire_transmissions { diff --git a/node/bft/tests/common/utils.rs b/node/bft/tests/common/utils.rs index 8d18b20592..27767dc8a5 100644 --- a/node/bft/tests/common/utils.rs +++ b/node/bft/tests/common/utils.rs @@ -14,14 +14,11 @@ // limitations under the License. use crate::common::{CurrentNetwork, TranslucentLedgerService, primary}; + use snarkos_account::Account; -use snarkos_node_bft::{ - Gateway, - Worker, - helpers::{PrimarySender, Storage}, -}; +use snarkos_node_bft::{Gateway, Primary, Worker, helpers::Storage, storage_service::BFTMemoryService}; +use snarkos_utilities::SimpleStoppable; -use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ console::account::Address, ledger::{ @@ -49,8 +46,7 @@ use locktick::parking_lot::RwLock; #[cfg(not(feature = "locktick"))] use parking_lot::RwLock; use rand::Rng; -use tokio::{sync::oneshot, task::JoinHandle, time::sleep}; -use tracing::*; +use tokio::{task::JoinHandle, time::sleep}; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -88,12 +84,7 @@ pub fn initialize_logger(verbosity: u8) { } /// Fires *fake* unconfirmed solutions at the node. -pub fn fire_unconfirmed_solutions( - sender: &PrimarySender, - node_id: u16, - interval_ms: u64, -) -> JoinHandle<()> { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +pub fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) -> JoinHandle<()> { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -119,13 +110,8 @@ pub fn fire_unconfirmed_solutions( // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng).await } else { sample(&mut unique_rng).await }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -136,11 +122,10 @@ pub fn fire_unconfirmed_solutions( /// Fires *fake* unconfirmed transactions at the node. pub fn fire_unconfirmed_transactions( - sender: &PrimarySender, + primary: Primary, node_id: u16, interval_ms: u64, ) -> JoinHandle<()> { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -167,13 +152,8 @@ pub fn fire_unconfirmed_transactions( loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -201,7 +181,7 @@ pub fn sample_ledger( let gen_ledger = primary::genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } /// Samples a new storage with the given ledger. diff --git a/node/bft/tests/components/worker.rs b/node/bft/tests/components/worker.rs index be53686ae0..4408afbf99 100644 --- a/node/bft/tests/components/worker.rs +++ b/node/bft/tests/components/worker.rs @@ -19,10 +19,7 @@ use crate::common::{ utils::{sample_ledger, sample_worker}, }; use snarkos_node_bft::helpers::max_redundant_requests; -use snarkvm::{ - ledger::narwhal::TransmissionID, - prelude::{Network, TestRng}, -}; +use snarkvm::{console::network::Network, ledger::narwhal::TransmissionID, prelude::TestRng, utilities::task}; use std::net::SocketAddr; @@ -57,7 +54,7 @@ async fn test_resend_transmission_request() { // Send a request to fetch the dummy transmission. let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -76,7 +73,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -95,7 +92,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let peer_ip = peer_ips.pop().unwrap(); let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -141,7 +138,7 @@ async fn test_flood_transmission_requests() { // Send the maximum number of redundant requests to fetch the dummy transmission. for peer_ip in remaining_peer_ips.clone() { let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); } tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -160,7 +157,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -179,7 +176,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = remaining_peer_ips.pop().unwrap(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; diff --git a/node/bft/tests/gateway_e2e.rs b/node/bft/tests/gateway_e2e.rs index 2ac0b37509..0d0211e842 100644 --- a/node/bft/tests/gateway_e2e.rs +++ b/node/bft/tests/gateway_e2e.rs @@ -22,13 +22,14 @@ use crate::common::{ test_peer::TestPeer, utils::{sample_gateway, sample_ledger, sample_storage}, }; + use snarkos_account::Account; -use snarkos_node_bft::{Gateway, helpers::init_primary_channels}; +use snarkos_node_bft::{Gateway, gateway::test_helpers::DummyGatewayPrimaryCallback}; use snarkos_node_bft_events::{ChallengeRequest, ChallengeResponse, Disconnect, DisconnectReason, Event, WorkerPing}; use snarkos_node_tcp::P2P; use snarkvm::{ledger::narwhal::Data, prelude::TestRng}; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use deadline::deadline; use rand::Rng; @@ -43,9 +44,7 @@ async fn new_test_gateway( let gateway = sample_gateway(accounts[0].clone(), storage, ledger); // Set up primary channels, we discard the rx as we're testing the gateway sans BFT. - let (primary_tx, _primary_rx) = init_primary_channels(); - - gateway.run(primary_tx, [].into(), None).await; + gateway.run([].into(), Arc::new(DummyGatewayPrimaryCallback::default()), None).await.unwrap(); (accounts, gateway) } diff --git a/node/bft/tests/narwhal_e2e.rs b/node/bft/tests/narwhal_e2e.rs index 63c803767d..c202d40fa7 100644 --- a/node/bft/tests/narwhal_e2e.rs +++ b/node/bft/tests/narwhal_e2e.rs @@ -38,7 +38,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -62,7 +63,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -114,7 +116,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -144,7 +147,8 @@ async fn test_storage_coherence() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/cdn/Cargo.toml b/node/cdn/Cargo.toml index 050cb382c7..09dab6e5ae 100644 --- a/node/cdn/Cargo.toml +++ b/node/cdn/Cargo.toml @@ -45,6 +45,9 @@ workspace = true optional = true features = [ "metrics" ] +[dependencies.snarkos-utilities] +workspace = true + [dependencies.rayon] workspace = true optional = true diff --git a/node/cdn/src/blocks.rs b/node/cdn/src/blocks.rs index 4b489d195e..ab9a13db10 100644 --- a/node/cdn/src/blocks.rs +++ b/node/cdn/src/blocks.rs @@ -17,6 +17,8 @@ // https://github.com/rust-lang/rust-clippy/issues/6446 #![allow(clippy::await_holding_lock)] +use snarkos_utilities::Stoppable; + use snarkvm::prelude::{ Deserialize, DeserializeOwned, @@ -87,11 +89,11 @@ impl CdnBlockSync { pub fn new>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> Self { let task = { let base_url = base_url.clone(); - tokio::spawn(async move { Self::worker(base_url, ledger, shutdown).await }) + tokio::spawn(async move { Self::worker(base_url, ledger, stoppable).await }) }; debug!("Started sync from CDN at {base_url}"); @@ -119,13 +121,13 @@ impl CdnBlockSync { async fn worker>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> SyncResult { // Fetch the node height. let start_height = ledger.latest_height() + 1; // Load the blocks from the CDN into the ledger. let ledger_clone = ledger.clone(); - let result = load_blocks(&base_url, start_height, None, shutdown, move |block: Block| { + let result = load_blocks(&base_url, start_height, None, stoppable, move |block: Block| { ledger_clone.advance_to_next_block(&block) }) .await; @@ -172,7 +174,7 @@ pub async fn load_blocks( base_url: &http::Uri, start_height: u32, end_height: Option, - shutdown: Arc, + stoppable: Arc, process: impl FnMut(Block) -> Result<()> + Clone + Send + Sync + 'static, ) -> Result { // Create a Client to maintain a connection pool throughout the sync. @@ -225,16 +227,19 @@ pub async fn load_blocks( // Spawn a background task responsible for concurrent downloads. let pending_blocks_clone = pending_blocks.clone(); let base_url = base_url.to_owned(); - let shutdown_clone = shutdown.clone(); - tokio::spawn(async move { - download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, shutdown_clone).await; - }); + + { + let stoppable = stoppable.clone(); + tokio::spawn(async move { + download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, stoppable).await; + }); + } // A loop for inserting the pending blocks into the ledger. let mut current_height = start_height.saturating_sub(1); while current_height < end_height - 1 { // If we are instructed to shut down, abort. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { info!("Stopping block sync at {} - shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -269,12 +274,12 @@ pub async fn load_blocks( // Attempt to advance the ledger using the CDN block bundle. let mut process_clone = process.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); current_height = tokio::task::spawn_blocking(move || { threadpool.install(|| { for block in next_blocks.into_iter().filter(|b| (start_height..end_height).contains(&b.height())) { // If we are instructed to shut down, abort. - if shutdown_clone.load(Ordering::Relaxed) { + if stoppable_clone.is_stopped() { info!("Stopping block sync at {} - the node is shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -314,7 +319,7 @@ async fn download_block_bundles( cdn_start: u32, cdn_end: u32, pending_blocks: Arc>>>, - shutdown: Arc, + stoppable: Arc, ) { // Keep track of the number of concurrent requests. let active_requests: Arc = Default::default(); @@ -322,7 +327,7 @@ async fn download_block_bundles( let mut start = cdn_start; while start < cdn_end - 1 { // If we are instructed to shut down, stop downloading. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { break; } @@ -356,7 +361,7 @@ async fn download_block_bundles( let base_url_clone = base_url.clone(); let pending_blocks_clone = pending_blocks.clone(); let active_requests_clone = active_requests.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); tokio::spawn(async move { // Increment the number of active requests. active_requests_clone.fetch_add(1, Ordering::Relaxed); @@ -392,7 +397,7 @@ async fn download_block_bundles( attempts += 1; if attempts > MAXIMUM_REQUEST_ATTEMPTS { warn!("Maximum number of requests to {blocks_url} reached - shutting down..."); - shutdown_clone.store(true, Ordering::Relaxed); + stoppable_clone.stop(); break; } tokio::time::sleep(Duration::from_secs(attempts as u64 * 10)).await; @@ -553,8 +558,10 @@ fn log_progress( #[cfg(test)] mod tests { - use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, log_progress}; - use crate::load_blocks; + use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, load_blocks, log_progress}; + + use snarkos_utilities::SimpleStoppable; + use snarkvm::prelude::{MainnetV0, block::Block}; use http::Uri; @@ -576,7 +583,7 @@ mod tests { let rt = tokio::runtime::Runtime::new().unwrap(); rt.block_on(async { let completed_height = - load_blocks(&testnet_cdn_url, start, end, Default::default(), process).await.unwrap(); + load_blocks(&testnet_cdn_url, start, end, SimpleStoppable::new(), process).await.unwrap(); assert_eq!(blocks.read().len(), expected); if expected > 0 { assert_eq!(blocks.read().last().unwrap().height(), completed_height); diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index 33421460f1..96e69f8673 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -21,15 +21,15 @@ default = [ ] locktick = [ "dep:locktick", "snarkos-node-bft/locktick", - "snarkos-node-bft-ledger-service/locktick", - "snarkos-node-bft-storage-service/locktick", "snarkvm/locktick" ] metrics = [ "dep:snarkos-node-metrics" ] telemetry = [ "snarkos-node-bft/telemetry" ] -cuda = [ "snarkvm/cuda", "snarkos-account/cuda", "snarkos-node-bft-ledger-service/cuda" ] -serial = [ "snarkos-node-bft-ledger-service/serial" ] +cuda = [ "snarkvm/cuda", "snarkos-account/cuda" ] +serial = [ ] +[dependencies.async-trait] +workspace = true [dependencies.aleo-std] workspace = true @@ -64,20 +64,14 @@ workspace = true [dependencies.snarkos-node-bft] workspace = true - -[dependencies.snarkos-node-bft-ledger-service] -workspace = true -features = [ "ledger", "ledger-write" ] - -[dependencies.snarkos-node-bft-storage-service] -workspace = true -features = [ "persistent" ] +features = [ "persistent-storage" ] [dependencies.snarkos-node-sync] workspace = true [dependencies.snarkvm] workspace = true +features = [ "utilities", "async" ] [dependencies.tokio] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 80415491bf..4a03d5e902 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -27,20 +27,13 @@ extern crate snarkos_node_metrics as metrics; use snarkos_account::Account; use snarkos_node_bft::{ BFT, + BftCallback, MAX_BATCH_DELAY_IN_MS, Primary, - helpers::{ - ConsensusReceiver, - PrimarySender, - Storage as NarwhalStorage, - fmt_id, - init_consensus_channels, - init_primary_channels, - }, - spawn_blocking, + helpers::{Storage as NarwhalStorage, fmt_id}, + ledger_service::LedgerService, + storage_service::BFTPersistentStorage, }; -use snarkos_node_bft_ledger_service::LedgerService; -use snarkos_node_bft_storage_service::BFTPersistentStorage; use snarkos_node_sync::{BlockSync, Ping}; use snarkvm::{ @@ -50,10 +43,11 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::*, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; -use anyhow::Result; +use anyhow::{Context, Result}; use colored::Colorize; use indexmap::IndexMap; #[cfg(feature = "locktick")] @@ -62,7 +56,6 @@ use lru::LruCache; #[cfg(not(feature = "locktick"))] use parking_lot::{Mutex, RwLock}; use std::{future::Future, net::SocketAddr, num::NonZeroUsize, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle}; #[cfg(feature = "metrics")] use std::collections::HashMap; @@ -84,7 +77,7 @@ const MAX_DEPLOYMENTS_PER_INTERVAL: usize = 1; /// /// Consensus acts as a rate limiter to prevents workers in BFT from being overloaded. /// Each worker maintains a ready queue (which is essentially also a mempool), but verifies transactions/solutions -/// before enquing them. +/// before enqueuing them. /// Consensus only passes more transactions/solutions to the BFT layer if its ready queues are not already full. #[derive(Clone)] pub struct Consensus { @@ -92,8 +85,6 @@ pub struct Consensus { ledger: Arc>, /// The BFT. bft: BFT, - /// The primary sender. - primary_sender: PrimarySender, /// The unconfirmed solutions queue. solutions_queue: Arc, Solution>>>, /// The unconfirmed transactions queue. @@ -125,21 +116,19 @@ impl Consensus { ping: Arc>, dev: Option, ) -> Result { - // Initialize the primary channels. - let (primary_sender, primary_receiver) = init_primary_channels::(); // Initialize the Narwhal transmissions. let transmissions = Arc::new(BFTPersistentStorage::open(storage_mode.clone())?); // Initialize the Narwhal storage. let storage = NarwhalStorage::new(ledger.clone(), transmissions, BatchHeader::::MAX_GC_ROUNDS as u64); // Initialize the BFT. let bft = - BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev)?; + BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev) + .await?; // Create a new instance of Consensus. let mut _self = Self { ledger, bft, block_sync, - primary_sender, solutions_queue: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(CAPACITY_FOR_SOLUTIONS).unwrap()))), transactions_queue: Default::default(), seen_solutions: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(1 << 16).unwrap()))), @@ -152,12 +141,9 @@ impl Consensus { info!("Starting the consensus instance..."); - // First, initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels(); - // Then, start the consensus handlers. - _self.start_handlers(consensus_receiver); + _self.start_handlers(); // Lastly, also start BFTs handlers. - _self.bft.run(Some(ping), Some(consensus_sender), _self.primary_sender.clone(), primary_receiver).await?; + _self.bft.run(Some(ping), Some(Arc::new(_self.clone()))).await?; Ok(_self) } @@ -337,7 +323,7 @@ impl Consensus { let solution_id = solution.id(); trace!("Adding unconfirmed solution '{}' to the memory pool...", fmt_id(solution_id)); // Send the unconfirmed solution to the primary. - if let Err(e) = self.primary_sender.send_unconfirmed_solution(solution_id, Data::Object(solution)).await { + if let Err(e) = self.bft.primary().process_unconfirmed_solution(solution_id, Data::Object(solution)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { // If error occurs after the first 10 blocks of the epoch, log it as a warning, otherwise ignore. @@ -437,7 +423,7 @@ impl Consensus { trace!("Adding unconfirmed {tx_type_str} transaction '{}' to the memory pool...", fmt_id(transaction_id)); // Send the unconfirmed transaction to the primary. if let Err(e) = - self.primary_sender.send_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await + self.bft.primary().process_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { @@ -456,17 +442,7 @@ impl Consensus { /// Starts the consensus handlers. /// /// This is only invoked once, in the constructor. - fn start_handlers(&self, consensus_receiver: ConsensusReceiver) { - let ConsensusReceiver { mut rx_consensus_subdag } = consensus_receiver; - - // Process the committed subdag and transmissions from the BFT. - let self_ = self.clone(); - self.spawn(async move { - while let Some((committed_subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - self_.process_bft_subdag(committed_subdag, transmissions, callback).await; - } - }); - + fn start_handlers(&self) { // Process the unconfirmed transactions in the memory pool. // // TODO (kaimast): This shouldn't happen periodically but only when new batches/blocks are accepted @@ -487,36 +463,39 @@ impl Consensus { } }); } +} +#[async_trait::async_trait] +impl BftCallback for Consensus { /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. async fn process_bft_subdag( &self, subdag: Subdag, transmissions: IndexMap, Transmission>, - callback: oneshot::Sender>, - ) { + ) -> Result<()> { // Try to advance to the next block. let self_ = self.clone(); let transmissions_ = transmissions.clone(); - let result = spawn_blocking! { self_.try_advance_to_next_block(subdag, transmissions_) }; + let result = task::spawn_blocking(move || self_.try_advance_to_next_block(subdag, transmissions_)).await; // If the block failed to advance, reinsert the transmissions into the memory pool. - if let Err(e) = &result { - error!("Unable to advance to the next block - {e}"); - // On failure, reinsert the transmissions into the memory pool. + if result.is_err() { self.reinsert_transmissions(transmissions).await; } - // Send the callback **after** advancing to the next block. - // Note: We must await the block to be advanced before sending the callback. - callback.send(result).ok(); + + result } +} +impl Consensus { /// Attempts to advance the ledger to the next block, and updates the metrics (if enabled) accordingly. fn try_advance_to_next_block( &self, subdag: Subdag, transmissions: IndexMap, Transmission>, ) -> Result<()> { + trace!("Trying to advance to new subdag anchored at round {}", subdag.anchor_round()); + #[cfg(feature = "metrics")] let start = subdag.leader_certificate().batch_header().timestamp(); #[cfg(feature = "metrics")] @@ -525,14 +504,20 @@ impl Consensus { let current_block_timestamp = self.ledger.latest_block().header().metadata().timestamp(); // Create the candidate next block. - let next_block = self.ledger.prepare_advance_to_next_quorum_block(subdag, transmissions)?; + let next_block = self + .ledger + .prepare_advance_to_next_quorum_block(subdag, transmissions) + .with_context(|| "Ledger preparation for advancement to next block failed")?; // Check that the block is well-formed. - self.ledger.check_next_block(&next_block)?; + self.ledger.check_next_block(&next_block).with_context(|| "Check for new block failed")?; // Advance to the next block. - self.ledger.advance_to_next_block(&next_block)?; + self.ledger.advance_to_next_block(&next_block).with_context(|| "Ledger advancement to new block failed")?; + + // Note: Do not return failure after this point, as the ledger already advanced. + #[cfg(feature = "telemetry")] // Fetch the latest committee - let latest_committee = self.ledger.current_committee()?; + let latest_committee = self.ledger.current_committee(); // If the next block starts a new epoch, clear the existing solutions. if next_block.height() % N::NUM_BLOCKS_PER_EPOCH == 0 { @@ -543,8 +528,10 @@ impl Consensus { } // Notify peers that we have a new block. - let locators = self.block_sync.get_block_locators()?; - self.ping.update_block_locators(locators); + match self.block_sync.get_block_locators() { + Ok(locators) => self.ping.update_block_locators(locators), + Err(err) => warn!("Failed to generate new block locators after block advancement: {err:?}"), + } // Make block sync aware of the new block. self.block_sync.set_sync_height(next_block.height()); @@ -571,7 +558,7 @@ impl Consensus { metrics::gauge(metrics::blocks::CUMULATIVE_PROOF_TARGET, cumulative_proof_target as f64); #[cfg(feature = "telemetry")] - { + if let Ok(latest_committee) = latest_committee { // Retrieve the latest participation scores. let participation_scores = self.bft().primary().gateway().validator_telemetry().get_participation_scores(&latest_committee); @@ -611,28 +598,24 @@ impl Consensus { transmission_id: TransmissionID, transmission: Transmission, ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the transmission to the primary. match (transmission_id, transmission) { - (TransmissionID::Ratification, Transmission::Ratification) => return Ok(()), + (TransmissionID::Ratification, Transmission::Ratification) => Ok(()), (TransmissionID::Solution(solution_id, _), Transmission::Solution(solution)) => { // Send the solution to the primary. - self.primary_sender.tx_unconfirmed_solution.send((solution_id, solution, callback)).await?; + self.bft.primary().process_unconfirmed_solution(solution_id, solution).await } (TransmissionID::Transaction(transaction_id, _), Transmission::Transaction(transaction)) => { // Send the transaction to the primary. - self.primary_sender.tx_unconfirmed_transaction.send((transaction_id, transaction, callback)).await?; + self.bft.primary().process_unconfirmed_transaction(transaction_id, transaction).await } _ => bail!("Mismatching `(transmission_id, transmission)` pair in consensus"), } - // Await the callback. - callback_receiver.await? } /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the consensus and BFT layers. diff --git a/node/router/Cargo.toml b/node/router/Cargo.toml index 0a13d9711c..5b6135464c 100644 --- a/node/router/Cargo.toml +++ b/node/router/Cargo.toml @@ -134,6 +134,9 @@ features = [ "test" ] workspace = true features = [ "test-helpers" ] +[dev-dependencies.tracing-test] +workspace = true + [dev-dependencies.tracing-subscriber] workspace = true features = [ "env-filter", "fmt" ] diff --git a/node/router/src/handshake.rs b/node/router/src/handshake.rs index 4f7c94d736..9b7804e445 100644 --- a/node/router/src/handshake.rs +++ b/node/router/src/handshake.rs @@ -131,7 +131,9 @@ impl Router { if let Some(addr) = listener_addr { if let Ok(ref challenge_request) = handshake_result { if let Some(peer) = self.peer_pool.write().get_mut(&addr) { - peer.upgrade_to_connected(peer_addr, challenge_request, self.clone()); + if let Err(err) = peer.upgrade_to_connected(peer_addr, challenge_request, self.clone()) { + warn!("Failed to upgrade peer to `connected`: {err}"); + } } #[cfg(feature = "metrics")] self.update_metrics(); diff --git a/node/router/src/helpers/peer.rs b/node/router/src/helpers/peer.rs index a05c1d3173..e90f8a0afe 100644 --- a/node/router/src/helpers/peer.rs +++ b/node/router/src/helpers/peer.rs @@ -16,6 +16,7 @@ use crate::{NodeType, Router, messages::ChallengeRequest}; use snarkvm::prelude::{Address, Network}; +use anyhow::{Result, ensure}; use std::{net::SocketAddr, time::Instant}; /// A peer of any connection status. @@ -84,9 +85,14 @@ impl Peer { } /// Promote a connecting peer to a fully connected one. - pub fn upgrade_to_connected(&mut self, connected_addr: SocketAddr, cr: &ChallengeRequest, router: Router) { + pub fn upgrade_to_connected( + &mut self, + connected_addr: SocketAddr, + cr: &ChallengeRequest, + router: Router, + ) -> Result<()> { // Logic check: this can only happen during the handshake. - assert!(matches!(self, Self::Connecting(_))); + ensure!(matches!(self, Self::Connecting(_)), "Peer is not in `connecting` state"); let timestamp = Instant::now(); let listener_addr = SocketAddr::from((connected_addr.ip(), cr.listener_port)); @@ -106,6 +112,8 @@ impl Peer { last_seen: timestamp, router, }); + + Ok(()) } /// Demote a peer to candidate status, marking it as disconnected. diff --git a/node/router/tests/heartbeat.rs b/node/router/tests/heartbeat.rs index c181c7c1c2..0e25c28f7c 100644 --- a/node/router/tests/heartbeat.rs +++ b/node/router/tests/heartbeat.rs @@ -91,6 +91,7 @@ async fn connect_to(router: &TestRouter, other: &TestRouter) { /// Checks that clients are ordered before nodes and that ordering is based on when a peer was last seen. #[tokio::test] +#[tracing_test::traced_test] async fn peer_priority_ordering() { let router = client(0, 10).await; router.enable_listener().await; diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index 4eb03f5e94..43beafed71 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -15,11 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}, + cdn::CdnBlockSync, + traits::NodeInterface, +}; use snarkos_account::Account; -use snarkos_node_bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}; -use snarkos_node_cdn::CdnBlockSync; use snarkos_node_rest::Rest; use snarkos_node_router::{ Heartbeat, @@ -34,6 +36,8 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ console::network::Network, ledger::{ @@ -43,6 +47,7 @@ use snarkvm::{ store::ConsensusStorage, }, prelude::{VM, block::Transaction}, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -60,17 +65,13 @@ use std::{ sync::{ Arc, atomic::{ - AtomicBool, AtomicUsize, Ordering::{Acquire, Relaxed}, }, }, time::{Duration, Instant}, }; -use tokio::{ - task::JoinHandle, - time::{sleep, timeout}, -}; +use tokio::time::{sleep, timeout}; /// The maximum number of solutions to verify in parallel. /// Note: worst case memory to verify a solution is 0.5 GiB. @@ -121,10 +122,10 @@ pub struct Client> { num_verifying_executions: Arc, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, + /// The signal handling logic. + signal_handler: Arc, } impl> Client { @@ -140,16 +141,13 @@ impl> Client { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::::load(genesis.clone(), storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), signal_handler.clone())); // Determine if the client should allow external peers. let allow_external_peers = true; @@ -191,13 +189,13 @@ impl> Client { num_verifying_deploys: Default::default(), num_verifying_executions: Default::default(), handles: Default::default(), - shutdown: shutdown.clone(), + signal_handler: signal_handler.clone(), }; // Perform sync with CDN (if enabled). let cdn_sync = cdn.map(|base_url| { trace!("CDN sync is enabled"); - Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown)) + Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler)) }); // Initialize the REST server. @@ -227,10 +225,7 @@ impl> Client { node.initialize_deploy_verification(); // Initialize execution verification. node.initialize_execute_verification(); - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); + // Return the node. Ok(node) } @@ -261,15 +256,15 @@ impl> Client { let _self = self.clone(); let mut last_update = Instant::now(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if _self.shutdown.load(std::sync::atomic::Ordering::Acquire) { - info!("Shutting down block production"); + if _self.signal_handler.is_stopped() { + info!("Shutting down sync task"); break; } - // Make sure we do not sync too often + // Make sure we do not sync too often. let now = Instant::now(); let elapsed = now.saturating_duration_since(last_update); let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); @@ -282,12 +277,12 @@ impl> Client { _self.try_block_sync().await; last_update = now; } - })); + }); } - /// Client-side version of `snarkvm_node_bft::Sync::try_block_sync()`. + /// Client-side version of `snarkos_node_bft::Sync::try_block_sync()`. async fn try_block_sync(&self) { - // Sleep briefly to avoid triggering spam detection. + // Wait for updates or a timeout. let _ = timeout(Self::SYNC_INTERVAL, self.sync.wait_for_update()).await; // For sanity, check that sync height is never below ledger height. @@ -360,7 +355,7 @@ impl> Client { } // Sleep to avoid triggering spam detection. - tokio::time::sleep(BLOCK_REQUEST_BATCH_DELAY).await; + sleep(BLOCK_REQUEST_BATCH_DELAY).await; } } @@ -368,10 +363,10 @@ impl> Client { fn initialize_solution_verification(&self) { // Start the solution verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down solution verification"); break; } @@ -394,7 +389,7 @@ impl> Client { let previous_counter = node.num_verifying_solutions.fetch_add(1, Relaxed); let _node = node.clone(); // For each solution, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Retrieve the latest epoch hash. if let Ok(epoch_hash) = _node.ledger.latest_epoch_hash() { // Check if the prover has reached their solution limit. @@ -435,17 +430,17 @@ impl> Client { } } } - })); + }); } /// Initializes deploy verification. fn initialize_deploy_verification(&self) { // Start the deploy verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down deployment verification"); break; } @@ -468,7 +463,7 @@ impl> Client { let previous_counter = node.num_verifying_deploys.fetch_add(1, Relaxed); let _node = node.clone(); // For each deployment, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the deployment. match _node.ledger.check_transaction_basic(&transaction, None, &mut rand::thread_rng()) { Ok(_) => { @@ -488,17 +483,17 @@ impl> Client { } } } - })); + }); } /// Initializes execute verification. fn initialize_execute_verification(&self) { // Start the execute verification loop. let node = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down execution verification"); break; } @@ -521,7 +516,7 @@ impl> Client { let previous_counter = node.num_verifying_executions.fetch_add(1, Relaxed); let _node = node.clone(); // For each execution, spawn a task to verify it. - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the execution. match _node.ledger.check_transaction_basic(&transaction, None, &mut rand::thread_rng()) { Ok(_) => { @@ -541,12 +536,12 @@ impl> Client { } } } - })); + }); } /// Spawns a task with the given future; it should only be used for long-running tasks. pub fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } } @@ -558,7 +553,6 @@ impl> NodeInterface for Client { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the client..."); diff --git a/node/src/lib.rs b/node/src/lib.rs index 3ac2106561..a162a12218 100644 --- a/node/src/lib.rs +++ b/node/src/lib.rs @@ -63,74 +63,3 @@ pub fn log_clean_error(storage_mode: &StorageMode) { } } } - -/// Starts the notification message loop. -pub fn start_notification_message_loop() -> tokio::task::JoinHandle<()> { - // let mut interval = tokio::time::interval(std::time::Duration::from_secs(180)); - tokio::spawn(async move { - // loop { - // interval.tick().await; - // // TODO (howardwu): Swap this with the official message for announcements. - // // info!("{}", notification_message()); - // } - }) -} - -/// Returns the notification message as a string. -pub fn notification_message() -> String { - use colored::Colorize; - - let mut output = String::new(); - output += &r#" - - ================================================================================================== - - 🚧 Welcome to Aleo - Calibration Period 🚧 - - ================================================================================================== - - During the calibration period, the network will be running in limited capacity. - - This calibration period is to ensure validators are stable and ready for mainnet launch. - During this period, the objective is to assess, adjust, and align validators' performance, - stability, and interoperability under varying network conditions. - - Please expect several network resets. With each network reset, software updates will - be performed to address potential bottlenecks, vulnerabilities, and/or inefficiencies, which - will ensure optimal performance for the ecosystem of validators, provers, and developers. - - ================================================================================================== - - Duration: - - Start Date: September 27, 2023 - - End Date: October 18, 2023 (subject to change) - - Participation: - - Node operators are NOT REQUIRED to participate during this calibration period. - - Network Resets: - - IMPORTANT: EXPECT MULTIPLE NETWORK RESETS. - - If participating, BE PREPARED TO RESET YOUR NODE AT ANY TIME. - - When a reset occurs, RUN THE FOLLOWING TO RESET YOUR NODE: - - git checkout mainnet && git pull - - cargo install --locked --path . - - snarkos clean - - snarkos start --nodisplay --client - - Communication: - - Stay ONLINE and MONITOR our Discord and Twitter for community updates. - - Purpose: - - This period is STRICTLY FOR NETWORK CALIBRATION. - - This period is NOT INTENDED for general-purpose usage by developers and provers. - - Incentives: - - There are NO INCENTIVES during this calibration period. - - ================================================================================================== -"# - .white() - .bold(); - - output -} diff --git a/node/src/node.rs b/node/src/node.rs index 591d5a0846..5a0540d6ce 100644 --- a/node/src/node.rs +++ b/node/src/node.rs @@ -13,9 +13,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::{Client, Prover, Validator, traits::NodeInterface}; +use crate::{ + Client, + Prover, + Validator, + router::{Outbound, Router, messages::NodeType}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_router::{Outbound, Router, messages::NodeType}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{ Address, Ledger, @@ -28,10 +36,7 @@ use snarkvm::prelude::{ use aleo_std::StorageMode; use anyhow::Result; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, -}; +use std::{net::SocketAddr, sync::Arc}; #[derive(Clone)] pub enum Node { @@ -59,7 +64,7 @@ impl Node { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Validator(Arc::new( Validator::new( @@ -76,7 +81,7 @@ impl Node { allow_external_peers, dev_txs, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -90,10 +95,10 @@ impl Node { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Prover(Arc::new( - Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, shutdown).await?, + Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, signal_handler).await?, ))) } @@ -109,7 +114,7 @@ impl Node { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Client(Arc::new( Client::new( @@ -123,7 +128,7 @@ impl Node { storage_mode, rotate_external_peers, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -229,4 +234,13 @@ impl Node { Self::Client(node) => node.shut_down().await, } } + + /// Waits until the node receives a signal. + pub async fn wait_for_signals(&self, signal_handler: &SignalHandler) { + match self { + Self::Validator(node) => node.wait_for_signals(signal_handler).await, + Self::Prover(node) => node.wait_for_signals(signal_handler).await, + Self::Client(node) => node.wait_for_signals(signal_handler).await, + } + } } diff --git a/node/src/prover/mod.rs b/node/src/prover/mod.rs index 44fe46e827..17f93d2bac 100644 --- a/node/src/prover/mod.rs +++ b/node/src/prover/mod.rs @@ -15,9 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::ledger_service::ProverLedgerService, + sync::{BlockSync, Ping}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_bft::ledger_service::ProverLedgerService; use snarkos_node_router::{ Heartbeat, Inbound, @@ -26,11 +30,12 @@ use snarkos_node_router::{ Routing, messages::{Message, NodeType, UnconfirmedSolution}, }; -use snarkos_node_sync::{BlockSync, Ping}; use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ ledger::narwhal::Data, prelude::{ @@ -40,6 +45,7 @@ use snarkvm::{ store::ConsensusStorage, }, synthesizer::VM, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -56,10 +62,9 @@ use std::{ net::SocketAddr, sync::{ Arc, - atomic::{AtomicBool, AtomicU8, Ordering}, + atomic::{AtomicU8, Ordering}, }, }; -use tokio::task::JoinHandle; /// A prover is a light node, capable of producing proofs for consensus. #[derive(Clone)] @@ -82,8 +87,8 @@ pub struct Prover> { max_puzzle_instances: u8, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, + /// The signal handling logic. + signal_handler: Arc, /// Keeps track of sending pings. ping: Arc>, /// PhantomData. @@ -99,11 +104,8 @@ impl> Prover { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger service. let ledger_service = Arc::new(ProverLedgerService::new()); // Determine if the prover should allow external peers. @@ -146,17 +148,13 @@ impl> Prover { max_puzzle_instances: u8::try_from(max_puzzle_instances)?, handles: Default::default(), ping, - shutdown, + signal_handler, _phantom: Default::default(), }; // Initialize the routing. node.initialize_routing().await; // Initialize the puzzle. node.initialize_puzzle().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) } @@ -174,7 +172,6 @@ impl> NodeInterface for Prover { // Shut down the puzzle. debug!("Shutting down the puzzle..."); - self.shutdown.store(true, Ordering::Release); // Abort the tasks. debug!("Shutting down the prover..."); @@ -192,7 +189,7 @@ impl> Prover { async fn initialize_puzzle(&self) { for _ in 0..self.max_puzzle_instances { let prover = self.clone(); - self.handles.lock().push(tokio::spawn(async move { + self.handles.lock().push(task::spawn(async move { prover.puzzle_loop().await; })); } @@ -228,13 +225,13 @@ impl> Prover { if let (Some(epoch_hash), Some((coinbase_target, proof_target))) = (latest_epoch_hash, latest_state) { // Execute the puzzle. let prover = self.clone(); - let result = tokio::task::spawn_blocking(move || { + let result = task::spawn_blocking(move || { prover.puzzle_iteration(epoch_hash, coinbase_target, proof_target, &mut OsRng) }) .await; // If the prover found a solution, then broadcast it. - if let Ok(Some((solution_target, solution))) = result { + if let Some((solution_target, solution)) = result { info!("Found a Solution '{}' (Proof Target {solution_target})", solution.id()); // Broadcast the solution. self.broadcast_solution(solution); @@ -245,7 +242,7 @@ impl> Prover { } // If the Ctrl-C handler registered the signal, stop the prover. - if self.shutdown.load(Ordering::Acquire) { + if self.signal_handler.is_stopped() { debug!("Shutting down the puzzle..."); break; } diff --git a/node/src/traits.rs b/node/src/traits.rs index 0c031c0ec9..481d700bda 100644 --- a/node/src/traits.rs +++ b/node/src/traits.rs @@ -13,19 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use snarkos_node_router::{Routing, messages::NodeType}; +use crate::router::{Routing, messages::NodeType}; + +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{Address, Network, PrivateKey, ViewKey}; -use once_cell::sync::OnceCell; -use std::{ - future::Future, - io, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, - time::Duration, -}; +use std::time::Duration; #[async_trait] pub trait NodeInterface: Routing { @@ -56,65 +50,20 @@ pub trait NodeInterface: Routing { /// Handles OS signals for the node to intercept and perform a clean shutdown. /// The optional `shutdown_flag` flag can be used to cleanly terminate the syncing process. - fn handle_signals(shutdown_flag: Arc) -> Arc> { - // In order for the signal handler to be started as early as possible, a reference to the node needs - // to be passed to it at a later time. - let node: Arc> = Default::default(); - - #[cfg(target_family = "unix")] - fn signal_listener() -> impl Future> { - use tokio::signal::unix::{SignalKind, signal}; - - // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. - let mut s_int = signal(SignalKind::interrupt()).unwrap(); - let mut s_term = signal(SignalKind::terminate()).unwrap(); - let mut s_quit = signal(SignalKind::quit()).unwrap(); - let mut s_hup = signal(SignalKind::hangup()).unwrap(); - - // Return when any of the signals above is received. - async move { - tokio::select!( - _ = s_int.recv() => (), - _ = s_term.recv() => (), - _ = s_quit.recv() => (), - _ = s_hup.recv() => (), - ); - Ok(()) - } - } - #[cfg(not(target_family = "unix"))] - fn signal_listener() -> impl Future> { - tokio::signal::ctrl_c() - } - - let node_clone = node.clone(); - tokio::task::spawn(async move { - match signal_listener().await { - Ok(()) => { - warn!("=========================================================================================="); - warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); - warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); - warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); - warn!("=========================================================================================="); - - match node_clone.get() { - // If the node is already initialized, then shut it down. - Some(node) => node.shut_down().await, - // Otherwise, if the node is not yet initialized, then set the shutdown flag directly. - None => shutdown_flag.store(true, Ordering::Relaxed), - } - - // A best-effort attempt to let any ongoing activity conclude. - tokio::time::sleep(Duration::from_secs(3)).await; - - // Terminate the process. - std::process::exit(0); - } - Err(error) => error!("tokio::signal::ctrl_c encountered an error: {}", error), - } - }); - - node + async fn wait_for_signals(&self, handler: &SignalHandler) { + handler.wait_for_signals().await; + + warn!("=========================================================================================="); + warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); + warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); + warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); + warn!("=========================================================================================="); + + // If the node is already initialized, then shut it down. + self.shut_down().await; + + // A best-effort attempt to let any ongoing activity conclude. + tokio::time::sleep(Duration::from_secs(3)).await; } /// Shuts down the node. diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 9bc6b98dcb..155928ad6c 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -18,7 +18,7 @@ mod router; use crate::traits::NodeInterface; use snarkos_account::Account; -use snarkos_node_bft::{ledger_service::CoreLedgerService, spawn_blocking}; +use snarkos_node_bft::ledger_service::CoreLedgerService; use snarkos_node_cdn::CdnBlockSync; use snarkos_node_consensus::Consensus; use snarkos_node_rest::Rest; @@ -35,12 +35,17 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; -use snarkvm::prelude::{ - Ledger, - Network, - block::{Block, Header}, - puzzle::Solution, - store::ConsensusStorage, +use snarkos_utilities::SignalHandler; + +use snarkvm::{ + prelude::{ + Ledger, + Network, + block::{Block, Header}, + puzzle::Solution, + store::ConsensusStorage, + }, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -50,12 +55,7 @@ use core::future::Future; use locktick::parking_lot::Mutex; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, - time::Duration, -}; -use tokio::task::JoinHandle; +use std::{net::SocketAddr, sync::Arc, time::Duration}; /// A validator is a full node, capable of validating blocks. #[derive(Clone)] @@ -72,8 +72,6 @@ pub struct Validator> { sync: Arc>, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, } @@ -94,16 +92,13 @@ impl> Validator { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::load(genesis, storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), signal_handler.clone())); // Determine if the validator should rotate external peers. let rotate_external_peers = false; @@ -150,11 +145,10 @@ impl> Validator { sync: sync.clone(), ping, handles: Default::default(), - shutdown: shutdown.clone(), }; // Perform sync with CDN (if enabled). - let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown))); + let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler))); // Initialize the transaction pool. node.initialize_transaction_pool(dev, dev_txs)?; @@ -186,10 +180,6 @@ impl> Validator { // Initialize the routing. node.initialize_routing().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) } @@ -416,15 +406,19 @@ impl> Validator { let inputs = [Value::from(Literal::Address(self_.address())), Value::from(Literal::U64(U64::new(1)))]; // Execute the transaction. let self__ = self_.clone(); - let transaction = match spawn_blocking!(self__.ledger.vm().execute( - self__.private_key(), - locator, - inputs.into_iter(), - None, - 10_000, - None, - &mut rand::thread_rng(), - )) { + let transaction = match task::spawn_blocking(move || { + self__.ledger.vm().execute( + self__.private_key(), + locator, + inputs.into_iter(), + None, + 10_000, + None, + &mut rand::thread_rng(), + ) + }) + .await + { Ok(transaction) => transaction, Err(error) => { error!("Transaction pool encountered an execution error - {error}"); @@ -449,7 +443,7 @@ impl> Validator { /// Spawns a task with the given future; it should only be used for long-running tasks. pub fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } } @@ -461,7 +455,6 @@ impl> NodeInterface for Validator { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the validator..."); @@ -531,7 +524,7 @@ mod tests { false, dev_txs, None, - Default::default(), + SignalHandler::new(), ) .await .unwrap(); diff --git a/node/sync/Cargo.toml b/node/sync/Cargo.toml index a759cc33db..a16fb2afbd 100644 --- a/node/sync/Cargo.toml +++ b/node/sync/Cargo.toml @@ -43,7 +43,7 @@ workspace = true [dependencies.locktick] workspace = true -features = [ "parking_lot" ] +features = [ "parking_lot", "tokio" ] optional = true [dependencies.parking_lot] diff --git a/node/sync/src/block_sync.rs b/node/sync/src/block_sync.rs index c372a758d9..5016451b05 100644 --- a/node/sync/src/block_sync.rs +++ b/node/sync/src/block_sync.rs @@ -14,14 +14,13 @@ // limitations under the License. use crate::{ + communication_service::CommunicationService, helpers::{PeerPair, PrepareSyncRequest, SyncRequest}, - locators::BlockLocators, + locators::{BlockLocators, CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}, }; use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::messages::DataBlocks; -use snarkos_node_sync_communication_service::CommunicationService; -use snarkos_node_sync_locators::{CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}; -use snarkvm::prelude::{Network, block::Block}; +use snarkvm::{console::network::Network, ledger::Block, utilities::LoggableError}; use anyhow::{Result, bail, ensure}; use indexmap::{IndexMap, IndexSet}; @@ -360,8 +359,8 @@ impl BlockSync { // Insert the chunk of block requests. for (height, (hash, previous_hash, _)) in requests.iter() { // Insert the block request into the sync pool using the sync IPs from the last block request in the chunk. - if let Err(error) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { - warn!("Block sync failed - {error}"); + if let Err(err) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { + err.log_error("Block sync failed"); return false; } } @@ -380,7 +379,7 @@ impl BlockSync { match sender { Some(sender) => { if let Err(err) = sender.await { - warn!("Failed to send block request to peer '{sync_ip}': {err}"); + err.log_warning(format!("Failed to send block request to peer '{sync_ip}'")); false } else { true @@ -401,7 +400,7 @@ impl BlockSync { let success = match result { Ok(success) => success, Err(err) => { - error!("tokio join error: {err}"); + err.log_error("tokio join error"); false } }; @@ -432,7 +431,7 @@ impl BlockSync { for block in blocks { if let Err(error) = self.insert_block_response(peer_ip, block) { self.remove_block_requests_to_peer(&peer_ip); - bail!("{error}"); + return Err(error); } } Ok(()) @@ -509,20 +508,20 @@ impl BlockSync { Ok(_) => match ledger.advance_to_next_block(&block) { Ok(_) => true, Err(err) => { - warn!( - "Failed to advance to next block (height: {}, hash: '{}'): {err}", + err.log_warning(format!( + "Failed to advance to next block (height: {}, hash: '{}')", block.height(), block.hash() - ); + )); false } }, Err(err) => { - warn!( - "The next block (height: {}, hash: '{}') is invalid - {err}", + err.log_warning(format!( + "The next block (height: {}, hash: '{}') is invalid", block.height(), block.hash() - ); + )); false } } @@ -1283,14 +1282,16 @@ fn construct_request( #[cfg(test)] mod tests { use super::*; - use crate::locators::{ - CHECKPOINT_INTERVAL, - NUM_RECENT_BLOCKS, - test_helpers::{sample_block_locators, sample_block_locators_with_fork}, + use crate::{ + communication_service::test_helpers::DummyCommunicationService, + locators::{ + CHECKPOINT_INTERVAL, + NUM_RECENT_BLOCKS, + test_helpers::{sample_block_locators, sample_block_locators_with_fork}, + }, }; use snarkos_node_bft_ledger_service::MockLedgerService; - use snarkos_node_sync_communication_service::test_helpers::DummyCommunicationService; use snarkvm::{ ledger::committee::Committee, prelude::{Field, TestRng}, diff --git a/node/tests/common/node.rs b/node/tests/common/node.rs index e161a703e7..b83d22ca1c 100644 --- a/node/tests/common/node.rs +++ b/node/tests/common/node.rs @@ -14,17 +14,26 @@ // limitations under the License. use crate::common::test_peer::sample_genesis_block; + use snarkos_account::Account; use snarkos_node::{Client, Prover, Validator}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{MainnetV0 as CurrentNetwork, store::helpers::memory::ConsensusMemory}; use aleo_std::StorageMode; -use std::str::FromStr; +use std::{ + net::{IpAddr, Ipv4Addr, SocketAddr}, + str::FromStr, +}; + +/// Bind to a random port to avoid conflicts during testing. +const ANY_ADDR: SocketAddr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0); pub async fn client() -> Client> { Client::new( - "127.0.0.1:0".parse().unwrap(), - None, + ANY_ADDR, + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], @@ -33,7 +42,7 @@ pub async fn client() -> Client> StorageMode::new_test(None), false, // No extra peer rotation. None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create client instance") @@ -41,13 +50,13 @@ pub async fn client() -> Client> pub async fn prover() -> Prover> { Prover::new( - "127.0.0.1:0".parse().unwrap(), + ANY_ADDR, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], sample_genesis_block(), StorageMode::new_test(None), None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create prover instance") @@ -55,9 +64,9 @@ pub async fn prover() -> Prover> pub async fn validator() -> Validator> { Validator::new( - "127.0.0.1:0".parse().unwrap(), - None, - None, + ANY_ADDR, + Some(ANY_ADDR), + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], @@ -68,7 +77,7 @@ pub async fn validator() -> Validator { #[tokio::test] + #[tracing_test::traced_test] $(#[$attr])? async fn $peer_type() { use deadline::deadline; @@ -144,6 +145,7 @@ mod validator { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn duplicate_disconnect_attempts() { // common::initialise_logger(3); diff --git a/node/tests/handshake.rs b/node/tests/handshake.rs index b3b8455f7e..f2049a5d30 100644 --- a/node/tests/handshake.rs +++ b/node/tests/handshake.rs @@ -183,6 +183,7 @@ mod validator { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn simultaneous_connection_attempt() { // common::initialise_logger(3); @@ -242,6 +243,7 @@ async fn simultaneous_connection_attempt() { } #[tokio::test(flavor = "multi_thread")] +#[tracing_test::traced_test] async fn duplicate_connection_attempts() { // common::initialise_logger(3); diff --git a/node/tests/peering.rs b/node/tests/peering.rs index e2ffd20132..5c40605bd0 100644 --- a/node/tests/peering.rs +++ b/node/tests/peering.rs @@ -30,6 +30,7 @@ macro_rules! test_reject_unsolicited_peer_response { $( paste! { #[tokio::test] + #[tracing_test::traced_test] async fn [<$node_type _rejects_unsolicited_peer_response>]() { // Spin up a full node. let node = $crate::common::node::$node_type().await; diff --git a/snarkos/main.rs b/snarkos/main.rs index f0558f35bc..77d8c63306 100644 --- a/snarkos/main.rs +++ b/snarkos/main.rs @@ -14,14 +14,17 @@ // limitations under the License. use snarkos_cli::{commands::CLI, helpers::Updater}; -use snarkvm::utilities::display_error; +use snarkvm::utilities::{ + display_error, + errors::{catch_unwind, set_panic_hook}, +}; use clap::Parser; #[cfg(feature = "locktick")] use locktick::lock_snapshots; +use std::env; #[cfg(feature = "locktick")] use std::time::Instant; -use std::{backtrace::Backtrace, env, panic::catch_unwind}; use tracing::log::logger; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] @@ -92,36 +95,10 @@ fn main() { } }); - // Set a custom hook here to show "pretty" errors when panicking. - std::panic::set_hook(Box::new(|err| { - print_error!("⚠️ {}\n", err.to_string().replace("panicked at", "snarkOS encountered an unexpected error at")); - - // Always show backtraces. - let backtrace = Backtrace::force_capture().to_string(); - - let mut msg = "Backtrace:\n".to_string(); - msg.push_str(" [...]\n"); - - // Remove all the low level frames. - // This can be done more cleanly once the `backtrace_frames` feature is stabilized. - let lines = backtrace.lines().skip_while(|line| !line.contains("core::panicking")); - - for line in lines { - // Stop printing once we hit the panic handler. - if line.contains("snarkos::main") { - break; - } - - msg.push_str(&format!("{line}\n")); - } - - // Print the entire backtrace as a single log message. - print_error!("{msg}"); - })); - // Run the CLI. // We use `catch_unwind` here to ensure a panic stops execution and not just a single thread. // Note: `catch_unwind` can be nested without problems. + set_panic_hook(); let result = catch_unwind(|| { // Parse the given arguments. let cli = CLI::parse(); @@ -151,7 +128,30 @@ fn main() { exit(1); } - Err(_) => { + Err((msg, backtrace)) => { + print_error!("⚠️ {}\n", msg.replace("panicked at", "snarkOS encountered an unexpected error at")); + + // Always show backtraces. + let mut msg = "Backtrace:\n".to_string(); + msg.push_str(" [...]\n"); + + // Remove all the low level frames. + // This can be done more cleanly once the `backtrace_frames` feature is stabilized. + let backtrace = backtrace.to_string(); + let lines = backtrace.lines().skip_while(|line| !line.contains("core::panicking")); + + for line in lines { + // Stop printing once we hit the panic handler. + if line.contains("snarkos::main") { + break; + } + + msg.push_str(&format!("{line}\n")); + } + + // Print the entire backtrace as a single log message. + print_error!("{msg}"); + // Print some information for the end-user. print_error!( "This is most likely a bug!\n\ Please report it to the snarkOS developers: https://github.com/ProvableHQ/snarkOS/issues/new?template=bug.md" diff --git a/utilities/Cargo.toml b/utilities/Cargo.toml new file mode 100644 index 0000000000..7ad655cf95 --- /dev/null +++ b/utilities/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "snarkos-utilities" +version = "4.2.1" +authors = [ "The Aleo Team " ] +description = "Utilities for a decentralized operating system" +homepage = "https://aleo.org" +repository = "https://github.com/ProvableHQ/snarkOS" +keywords = [ + "aleo", + "cryptography", + "blockchain", + "decentralized", + "zero-knowledge" +] +categories = [ "cryptography", "cryptography::cryptocurrencies", "os" ] +license = "Apache-2.0" +edition = "2024" + +[dependencies.tokio] +workspace = true +features = [ "macros", "signal" ] + +[dependencies.tracing] +workspace = true diff --git a/utilities/LICENSE.md b/utilities/LICENSE.md new file mode 100644 index 0000000000..d0af96c393 --- /dev/null +++ b/utilities/LICENSE.md @@ -0,0 +1,194 @@ +Apache License +============== + +_Version 2.0, January 2004_ +_<>_ + +### Terms and Conditions for use, reproduction, and distribution + +#### 1. Definitions + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means **(i)** the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the +outstanding shares, or **(iii)** beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +“submitted” means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +#### 2. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +#### 3. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +#### 4. Redistribution + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +* **(a)** You must give any other recipients of the Work or Derivative Works a copy of +this License; and +* **(b)** You must cause any modified files to carry prominent notices stating that You +changed the files; and +* **(c)** You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +* **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +#### 5. Submission of Contributions + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +#### 6. Trademarks + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +#### 7. Disclaimer of Warranty + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +#### 8. Limitation of Liability + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +#### 9. Accepting Warranty or Additional Liability + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +_END OF TERMS AND CONDITIONS_ + +### APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets `[]` replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same “printed page” as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs new file mode 100644 index 0000000000..2e53d8550f --- /dev/null +++ b/utilities/src/lib.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod signals; +pub use signals::*; diff --git a/utilities/src/signals.rs b/utilities/src/signals.rs new file mode 100644 index 0000000000..c2b0e9cd9c --- /dev/null +++ b/utilities/src/signals.rs @@ -0,0 +1,132 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, +}; +use tokio::sync::Notify; + +use tracing::{debug, error}; + +/// Generic trait that can be queried for whether current process should be stopped. +/// This is implemented by `SignalHandler` and `SimpleStoppable`. +pub trait Stoppable: Send + Sync { + fn stop(&self); + fn is_stopped(&self) -> bool; +} + +/// Wrapper around `AtomicBool` that implements the `Stoppable` trait. +/// +/// This is useful when no signal or complex shutdown handling is necessary. +pub struct SimpleStoppable { + state: AtomicBool, +} + +impl SimpleStoppable { + pub fn new() -> Arc { + Arc::new(Self { state: AtomicBool::new(false) }) + } +} + +impl Stoppable for SimpleStoppable { + fn stop(&self) { + self.state.store(true, Ordering::SeqCst); + } + + fn is_stopped(&self) -> bool { + self.state.load(Ordering::SeqCst) + } +} + +/// Helper for signal handling +pub struct SignalHandler { + stopped: AtomicBool, + notify: Notify, +} + +impl SignalHandler { + pub fn new() -> Arc { + let obj = Arc::new(Self { stopped: AtomicBool::new(false), notify: Default::default() }); + + { + let obj = obj.clone(); + tokio::spawn(async move { + obj.handle_signals().await; + }); + } + + obj + } + + /// Background task that wait for signal. + async fn handle_signals(&self) { + #[cfg(target_family = "unix")] + let signal_listener = async move { + use tokio::signal::unix::{SignalKind, signal}; + + // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. + let mut s_int = signal(SignalKind::interrupt())?; + let mut s_term = signal(SignalKind::terminate())?; + let mut s_quit = signal(SignalKind::quit())?; + let mut s_hup = signal(SignalKind::hangup())?; + + tokio::select!( + _ = s_int.recv() => debug!("Received SIGINT"), + _ = s_term.recv() => debug!("Received SIGTERM"), + _ = s_quit.recv() => debug!("Received SIGQUIT"), + _ = s_hup.recv() => debug!("Received SIGHUP"), + ); + + std::io::Result::<()>::Ok(()) + }; + + #[cfg(not(target_family = "unix"))] + let signal_listener = async move { + tokio::signal::ctrl_c()?; + debug!("Got signal"); + Ok(()) + }; + + // Block until the signal. + match signal_listener.await { + Ok(()) => {} + Err(error) => { + error!("tokio::signal encountered an error: {error}"); + } + } + + self.stop(); + } + + /// Blocks until the signal handler was invoked. + /// Note: This can only be called once, and must not be called concurrently. + pub async fn wait_for_signals(&self) { + while !self.is_stopped() { + self.notify.notified().await + } + } +} + +impl Stoppable for SignalHandler { + fn stop(&self) { + self.stopped.store(true, Ordering::SeqCst); + self.notify.notify_one(); + } + + fn is_stopped(&self) -> bool { + self.stopped.load(Ordering::SeqCst) + } +}