diff --git a/crates/hashi-guardian/src/enclave.rs b/crates/hashi-guardian/src/enclave.rs index 05c874f71..391105376 100644 --- a/crates/hashi-guardian/src/enclave.rs +++ b/crates/hashi-guardian/src/enclave.rs @@ -309,6 +309,7 @@ impl EnclaveState { pub async fn consume_from_limiter( &self, + wid: u64, seq: u64, timestamp: u64, amount_sats: u64, @@ -323,10 +324,53 @@ impl EnclaveState { ) .await .map_err(|_| InvalidInputs("timed out waiting for rate limiter lock".into()))?; - guard.consume(seq, timestamp, amount_sats)?; + guard.consume(wid, seq, timestamp, amount_sats)?; Ok(LimiterGuard::new(guard)) } + /// Soft-reserve headroom for `amount_sats` against `wid`. Idempotent: + /// repeat calls for the same wid return (and refresh) the existing + /// reservation. Reservations are dropped either by a matching + /// `consume_from_limiter` call or by a TTL sweep. + pub async fn soft_reserve( + &self, + wid: u64, + timestamp_secs: u64, + amount_sats: u64, + now_unix_secs: u64, + ) -> GuardianResult { + let rate_limiter = self + .rate_limiter + .get() + .ok_or_else(|| InvalidInputs("rate_limiter not initialized".into()))?; + let mut guard = tokio::time::timeout( + Self::LIMITER_LOCK_TIMEOUT, + rate_limiter.clone().lock_owned(), + ) + .await + .map_err(|_| InvalidInputs("timed out waiting for rate limiter lock".into()))?; + guard.soft_reserve(wid, timestamp_secs, amount_sats, now_unix_secs) + } + + /// Drop any soft reservation whose TTL has elapsed. Called periodically + /// from the guardian's background sweep task. + pub async fn expire_pending_reserves(&self, now_unix_secs: u64) -> usize { + let Some(rate_limiter) = self.rate_limiter.get() else { + return 0; + }; + // Use a short timeout — if the limiter is held longer than that by + // an in-flight withdrawal, we'll just try again next tick. + match tokio::time::timeout( + Self::LIMITER_LOCK_TIMEOUT, + rate_limiter.clone().lock_owned(), + ) + .await + { + Ok(mut guard) => guard.expire_pending(now_unix_secs), + Err(_) => 0, + } + } + /// Snapshot the current rate limiter state, if the limiter has been /// initialized. Used by `GetGuardianInfo` so that clients can seed their /// local `seq` counter at startup. diff --git a/crates/hashi-guardian/src/main.rs b/crates/hashi-guardian/src/main.rs index c130f4319..dcd1bc747 100644 --- a/crates/hashi-guardian/src/main.rs +++ b/crates/hashi-guardian/src/main.rs @@ -12,6 +12,7 @@ use hashi_types::guardian::GuardianEncKeyPair; use hashi_types::guardian::GuardianSignKeyPair; use hashi_types::proto::guardian_service_server::GuardianServiceServer; use std::sync::Arc; +use std::time::Duration; use tonic::transport::Server; use tonic_health::server::health_reporter; use tracing::info; @@ -61,9 +62,11 @@ async fn main() -> Result<()> { .add_service(GuardianServiceServer::new(svc)) .serve(addr); - let heartbeat_future = HeartbeatWriter::new(enclave, MAX_HEARTBEAT_FAILURES_INTERVAL) + let heartbeat_future = HeartbeatWriter::new(enclave.clone(), MAX_HEARTBEAT_FAILURES_INTERVAL) .run(HEARTBEAT_INTERVAL, HEARTBEAT_RETRY_INTERVAL); + let ttl_sweep_future = run_soft_reserve_sweep(enclave.clone()); + tokio::select! { res = server_future => { res.map_err(|e| anyhow::anyhow!("Server error: {}", e)) @@ -71,5 +74,27 @@ async fn main() -> Result<()> { res = heartbeat_future => { panic!("Heartbeat failed: {:?}", res) } + _ = ttl_sweep_future => { + unreachable!("soft-reserve sweep loop should run forever") + } + } +} + +/// Periodically drop soft reservations whose TTL has elapsed. Runs until +/// shutdown. Swept entries free up capacity for subsequent soft reserves. +async fn run_soft_reserve_sweep(enclave: Arc) -> ! { + const SWEEP_INTERVAL: Duration = Duration::from_secs(1); + let mut ticker = tokio::time::interval(SWEEP_INTERVAL); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + ticker.tick().await; + let now = hashi_types::guardian::now_timestamp_secs(); + let removed = enclave.state.expire_pending_reserves(now).await; + if removed > 0 { + info!( + removed, + "soft-reserve TTL sweep dropped {removed} expired reservations" + ); + } } } diff --git a/crates/hashi-guardian/src/rpc.rs b/crates/hashi-guardian/src/rpc.rs index 8cdb4d5c1..b7e135b69 100644 --- a/crates/hashi-guardian/src/rpc.rs +++ b/crates/hashi-guardian/src/rpc.rs @@ -135,4 +135,39 @@ impl proto::guardian_service_server::GuardianService for GuardianGrpc { let resp_pb = proto_conversions::standard_withdrawal_response_signed_to_pb(response); Ok(Response::new(resp_pb)) } + + async fn soft_reserve_withdrawal( + &self, + request: Request, + ) -> Result, Status> { + if self.setup_mode { + return Err(Status::failed_precondition( + "soft_reserve_withdrawal is disabled when SETUP_MODE=true", + )); + } + + let req = request.into_inner(); + let wid = req + .wid + .ok_or_else(|| Status::invalid_argument("wid is required"))?; + let amount_sats = req + .amount_sats + .ok_or_else(|| Status::invalid_argument("amount_sats is required"))?; + let timestamp_secs = req + .timestamp_secs + .ok_or_else(|| Status::invalid_argument("timestamp_secs is required"))?; + + let reserve = withdraw::soft_reserve_withdrawal( + self.enclave.clone(), + wid, + timestamp_secs, + amount_sats, + ) + .await + .map_err(to_status)?; + + Ok(Response::new(proto::SoftReserveWithdrawalResponse { + expires_at_secs: Some(reserve.expires_at_secs), + })) + } } diff --git a/crates/hashi-guardian/src/withdraw.rs b/crates/hashi-guardian/src/withdraw.rs index 68c07c7c3..4a0a2bb8d 100644 --- a/crates/hashi-guardian/src/withdraw.rs +++ b/crates/hashi-guardian/src/withdraw.rs @@ -12,6 +12,7 @@ use hashi_types::guardian::GuardianResult; use hashi_types::guardian::GuardianSigned; use hashi_types::guardian::HashiCommittee; use hashi_types::guardian::HashiSigned; +use hashi_types::guardian::PendingReserve; use hashi_types::guardian::RateLimiter; use hashi_types::guardian::StandardWithdrawalRequest; use hashi_types::guardian::StandardWithdrawalRequestWire; @@ -67,6 +68,60 @@ pub async fn standard_withdrawal( } } +/// Light-touch pre-construction rate-limit probe. +/// +/// Idempotent on `wid`: callers may re-submit the same wid from any node +/// at any time to learn whether capacity is available. A successful +/// reserve debits against future `soft_reserve` capacity checks but does +/// NOT advance `next_seq` or `last_updated_at`. The reservation is +/// dropped either by a matching `standard_withdrawal` commit or by the +/// TTL sweep. +/// +/// Unlike `standard_withdrawal`, soft reserve does not require a +/// committee certificate — the 5-minute TTL bounds the DoS blast radius +/// of a rogue or buggy caller to at most one bucket's worth of headroom. +pub async fn soft_reserve_withdrawal( + enclave: Arc, + wid: u64, + timestamp_secs: u64, + amount_sats: u64, +) -> GuardianResult { + info!(wid, amount_sats, "soft reserve"); + + if !enclave.is_fully_initialized() { + return Err(EnclaveUninitialized); + } + + // A wid that has already been fully processed has no meaning in the + // pending queue; short-circuit to avoid confusing callers. + if let Some(_cached) = enclave.state.get_cached_response(wid) { + // Already hard-reserved: nothing to pend. Return a sentinel + // reservation pointing at "now" so callers can proceed to the + // hard reserve (which will hit the idempotency cache). + let now = now_timestamp_secs(); + return Ok(PendingReserve { + amount_sats, + timestamp_secs, + expires_at_secs: now, + }); + } + + // Reject timestamps too far in the future (clock skew protection). + const MAX_CLOCK_SKEW_SECS: u64 = 5 * 60; + let guardian_now = now_timestamp_secs(); + if timestamp_secs > guardian_now + MAX_CLOCK_SKEW_SECS { + return Err(InvalidInputs(format!( + "soft reserve timestamp {timestamp_secs} is too far in the future \ + (guardian clock: {guardian_now})" + ))); + } + + enclave + .state + .soft_reserve(wid, timestamp_secs, amount_sats, guardian_now) + .await +} + // TODO: Support batched withdrawals (multiple wids per transaction). async fn normal_withdrawal_inner( enclave: Arc, @@ -109,9 +164,11 @@ async fn normal_withdrawal_inner( info!("Checking rate limits."); let consumed_amount_sats = request.utxos().external_out_amount().to_sat(); + let wid = *request.wid(); let limiter_guard = enclave .state .consume_from_limiter( + wid, request.seq(), request.timestamp_secs(), consumed_amount_sats, diff --git a/crates/hashi-types/proto/sui/hashi/v1alpha/guardian_service.proto b/crates/hashi-types/proto/sui/hashi/v1alpha/guardian_service.proto index 9d47b4d98..4079d790e 100644 --- a/crates/hashi-types/proto/sui/hashi/v1alpha/guardian_service.proto +++ b/crates/hashi-types/proto/sui/hashi/v1alpha/guardian_service.proto @@ -18,6 +18,14 @@ service GuardianService { // Provisioner initialization: submit encrypted share and readiness info. rpc ProvisionerInit(ProvisionerInitRequest) returns (ProvisionerInitResponse); + // Soft-reserve a withdrawal slot (light-touch pre-construction check). + // Any node may call this before committing an on-chain withdrawal tx to + // learn whether rate-limit headroom exists for `amount_sats`. Soft + // reserves are idempotent on `wid`, do not debit the bucket, and expire + // after a server-enforced TTL (so a rogue or crashed caller cannot lock + // capacity indefinitely). + rpc SoftReserveWithdrawal(SoftReserveWithdrawalRequest) returns (SoftReserveWithdrawalResponse); + // Standard withdrawal: request immediate withdrawal signature. rpc StandardWithdrawal(SignedStandardWithdrawalRequest) returns (SignedStandardWithdrawalResponse); } @@ -34,8 +42,10 @@ message GetGuardianInfoResponse { // Signed guardian info (includes server version, encryption pubkey, and optional S3/bucket info). SignedGuardianInfo signed_info = 3; - // Current rate limiter state (if initialized). Lets clients seed their - // local seq counter at startup so it survives restarts and leader rotations. + // Current rate limiter state (if initialized). Clients query this + // just-in-time per withdrawal to read `next_seq`; the guardian's + // wid-keyed response cache makes repeated attempts idempotent so no + // client-side seq bookkeeping is needed. LimiterState limiter_state = 4; } @@ -249,3 +259,33 @@ message StandardWithdrawalResponseData { // Bitcoin signatures for each input (64 bytes each, Schnorr signatures). repeated bytes enclave_signatures = 1; } + +// ============================ +// SoftReserveWithdrawal +// ============================ + +// Pre-construction rate-limit check. Soft reserves are idempotent on +// `wid`: re-submitting the same wid extends / returns the existing +// reservation. The guardian does not debit the bucket until a matching +// StandardWithdrawal lands (or the reservation TTL elapses). +message SoftReserveWithdrawalRequest { + // Deterministic withdrawal identifier — same as + // `StandardWithdrawalRequestData.wid`. Derived on the client from the + // on-chain request ids so any validator converges on the same value. + optional uint64 wid = 1; + + // Upper-bound amount in satoshis being reserved (pre-fee sum of the + // batched user-facing withdrawal amounts). + optional uint64 amount_sats = 2; + + // Unix seconds timestamp for the reserve, monotonically non-decreasing + // across successful soft+hard reserves. Clients use the most recent + // Sui checkpoint clock. + optional uint64 timestamp_secs = 3; +} + +message SoftReserveWithdrawalResponse { + // Unix seconds at which this reservation will be garbage-collected if + // no matching StandardWithdrawal has committed it by then. + optional uint64 expires_at_secs = 1; +} diff --git a/crates/hashi-types/src/guardian/limiter.rs b/crates/hashi-types/src/guardian/limiter.rs index 9dc3f88fb..3b8deef82 100644 --- a/crates/hashi-types/src/guardian/limiter.rs +++ b/crates/hashi-types/src/guardian/limiter.rs @@ -5,6 +5,13 @@ use super::GuardianError::InvalidInputs; use super::GuardianError::RateLimitExceeded; use super::GuardianResult; use serde::Serialize; +use std::collections::HashMap; + +/// How long a soft reservation is held before the guardian garbage +/// collects it. Long enough to cover committee fan-out + MPC signing +/// on any realistic path; short enough that a crashed caller does not +/// lock capacity for operators to notice and intervene. +pub const SOFT_RESERVE_TTL_SECS: u64 = 5 * 60; /// Immutable configuration for the token bucket rate limiter. #[derive(Debug, Copy, Clone, PartialEq, Serialize)] @@ -27,12 +34,29 @@ pub struct LimiterState { pub next_seq: u64, } +/// An in-flight soft reservation. Held by the guardian until either a +/// matching `consume` converts it to a hard reserve or the TTL expires. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct PendingReserve { + /// Amount of sats soft-reserved. + pub amount_sats: u64, + /// Timestamp of the reserving request (unix seconds). + pub timestamp_secs: u64, + /// Unix-seconds wall-clock at which the guardian will drop this entry + /// if still present. + pub expires_at_secs: u64, +} + /// Token bucket rate limiter. Tokens refill linearly over time. /// /// Pure data structure — concurrency is handled by the caller via a Mutex. pub struct RateLimiter { config: LimiterConfig, state: LimiterState, + /// In-flight soft reservations keyed by `wid`. Idempotent on wid. + /// Subtracted from refilled capacity on new soft-reserve capacity + /// checks so concurrent reservations do not over-commit the bucket. + pending_reserves: HashMap, /// Snapshot of state before the most recent `consume`, used for revert. prev_state: LimiterState, } @@ -48,6 +72,7 @@ impl RateLimiter { Ok(Self { config, state, + pending_reserves: HashMap::new(), prev_state, }) } @@ -64,9 +89,112 @@ impl RateLimiter { self.state.next_seq } + /// Number of pending soft reservations (exposed for metrics/tests). + pub fn pending_reserves_len(&self) -> usize { + self.pending_reserves.len() + } + + /// Look up a pending soft reservation by wid. + pub fn pending_reserve(&self, wid: u64) -> Option<&PendingReserve> { + self.pending_reserves.get(&wid) + } + + /// Effective "last updated at" — the max of the committed + /// `last_updated_at` and the timestamps of all outstanding soft + /// reservations. New requests must supply a timestamp at least this + /// large so the refill window is monotonic. + pub fn effective_last_updated_at(&self) -> u64 { + let latest_pending = self + .pending_reserves + .values() + .map(|r| r.timestamp_secs) + .max() + .unwrap_or(0); + self.state.last_updated_at.max(latest_pending) + } + + /// Reserve `amount_sats` for `wid` pending a future `consume`. + /// + /// Idempotent on wid: a repeat call with an already-pending wid + /// returns the existing reservation unchanged. This makes it safe + /// for multiple validators to probe the guardian concurrently. + /// + /// `now_unix_secs` is the guardian's wall clock used to compute the + /// TTL deadline; the authoritative refill check uses the + /// caller-supplied `timestamp_secs` (monotonic across the Sui + /// clock). + pub fn soft_reserve( + &mut self, + wid: u64, + timestamp_secs: u64, + amount_sats: u64, + now_unix_secs: u64, + ) -> GuardianResult { + // Idempotency: re-probing with the same wid returns the same + // reservation. Extend its TTL so legitimate retries don't race + // with expiry. + if let Some(existing) = self.pending_reserves.get_mut(&wid) { + existing.expires_at_secs = now_unix_secs.saturating_add(SOFT_RESERVE_TTL_SECS); + return Ok(*existing); + } + + let effective_last = self.effective_last_updated_at(); + if timestamp_secs < effective_last { + return Err(InvalidInputs(format!( + "timestamp {timestamp_secs} < effective_last_updated_at {effective_last}" + ))); + } + + let capacity = self.capacity_at(timestamp_secs); + if capacity < amount_sats { + return Err(RateLimitExceeded); + } + + let reserve = PendingReserve { + amount_sats, + timestamp_secs, + expires_at_secs: now_unix_secs.saturating_add(SOFT_RESERVE_TTL_SECS), + }; + self.pending_reserves.insert(wid, reserve); + Ok(reserve) + } + + /// Drop any pending reserve whose TTL has elapsed at `now_unix_secs`. + /// Returns the number of entries removed (for tests + metrics). + pub fn expire_pending(&mut self, now_unix_secs: u64) -> usize { + let before = self.pending_reserves.len(); + self.pending_reserves + .retain(|_, r| r.expires_at_secs > now_unix_secs); + before - self.pending_reserves.len() + } + + /// Capacity at `timestamp` accounting for refill since + /// `last_updated_at` AND outstanding soft reserves. Used by + /// `soft_reserve` to reject over-commitment. + fn capacity_at(&self, timestamp_secs: u64) -> u64 { + let elapsed = timestamp_secs.saturating_sub(self.state.last_updated_at); + let refilled = elapsed.saturating_mul(self.config.refill_rate); + let base = self + .state + .num_tokens_available + .saturating_add(refilled) + .min(self.config.max_bucket_capacity); + let pending_sum: u64 = self.pending_reserves.values().map(|r| r.amount_sats).sum(); + base.saturating_sub(pending_sum) + } + /// Consume tokens from the bucket. Validates seq and timestamp ordering, /// refills based on elapsed time, then debits the requested amount. - pub fn consume(&mut self, seq: u64, timestamp: u64, amount_sats: u64) -> GuardianResult<()> { + /// + /// If a soft reservation exists for `wid`, it is removed as part of + /// the consume (converting the soft reserve to a hard reserve). + pub fn consume( + &mut self, + wid: u64, + seq: u64, + timestamp: u64, + amount_sats: u64, + ) -> GuardianResult<()> { if seq != self.state.next_seq { return Err(InvalidInputs(format!( "seq mismatch: expected {}, got {}", @@ -95,8 +223,12 @@ impl RateLimiter { return Err(RateLimitExceeded); } - // Snapshot for revert, then mutate. + // Snapshot for revert, then mutate. The matching soft reservation + // (if any) is removed on success; on revert it would otherwise + // double-count against future capacity, so we drop it here + // regardless. self.prev_state = self.state; + self.pending_reserves.remove(&wid); self.state.last_updated_at = timestamp; self.state.num_tokens_available = capacity - amount_sats; self.state.next_seq += 1; @@ -131,18 +263,18 @@ mod test { fn test_basic() { let (config, state) = make_limiter(); let mut limiter = RateLimiter::new(config, state).unwrap(); - assert!(limiter.consume(0, 1, config.refill_rate).is_ok()); + assert!(limiter.consume(1, 0, 1, config.refill_rate).is_ok()); let target_amount = 1_000_000u64; let num_secs_required = target_amount.div_ceil(config.refill_rate); assert!( limiter - .consume(1, num_secs_required, target_amount) + .consume(2, 1, num_secs_required, target_amount) .is_err() ); assert!( limiter - .consume(1, 1 + num_secs_required, target_amount) + .consume(2, 1, 1 + num_secs_required, target_amount) .is_ok() ); } @@ -153,12 +285,12 @@ mod test { let mut limiter = RateLimiter::new(config, state).unwrap(); assert!( limiter - .consume(0, u64::MAX, config.max_bucket_capacity + 1) + .consume(1, 0, u64::MAX, config.max_bucket_capacity + 1) .is_err() ); assert!( limiter - .consume(0, u64::MAX, config.max_bucket_capacity) + .consume(1, 0, u64::MAX, config.max_bucket_capacity) .is_ok() ); } @@ -168,7 +300,7 @@ mod test { let (config, state) = make_limiter(); let mut limiter = RateLimiter::new(config, state).unwrap(); // Consume after refill, then revert — should restore original state. - limiter.consume(0, 100, 50_000).unwrap(); + limiter.consume(1, 0, 100, 50_000).unwrap(); assert_eq!(limiter.state().num_tokens_available, 50_000); // 100*1000 - 50_000 limiter.revert(); assert_eq!(limiter.state().num_tokens_available, 0); @@ -181,10 +313,103 @@ mod test { let (config, state) = make_limiter(); let mut limiter = RateLimiter::new(config, state).unwrap(); // Wrong seq. - assert!(limiter.consume(1, 0, 0).is_err()); + assert!(limiter.consume(1, 1, 0, 0).is_err()); // Advance state. - limiter.consume(0, 100, 1_000).unwrap(); + limiter.consume(1, 0, 100, 1_000).unwrap(); // Old timestamp. - assert!(limiter.consume(1, 50, 1_000).is_err()); + assert!(limiter.consume(2, 1, 50, 1_000).is_err()); + } + + // ============================ + // Soft reserve behavior + // ============================ + + #[test] + fn test_soft_reserve_is_idempotent_on_wid() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + // First soft reserve at t=100 for 50k sats, 100*1000=100k refill. + let r1 = limiter.soft_reserve(42, 100, 50_000, 1_000).unwrap(); + assert_eq!(r1.amount_sats, 50_000); + assert_eq!(limiter.pending_reserves_len(), 1); + + // Idempotent: same wid returns the same amount/timestamp; TTL refreshed. + let r2 = limiter.soft_reserve(42, 200, 999_999, 2_000).unwrap(); + assert_eq!(limiter.pending_reserves_len(), 1); + assert_eq!(r2.amount_sats, r1.amount_sats); + assert_eq!(r2.timestamp_secs, r1.timestamp_secs); + assert!(r2.expires_at_secs > r1.expires_at_secs); + } + + #[test] + fn test_soft_reserve_rejects_over_commitment_across_wids() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + // Capacity at t=100 = 100k. First reserve takes 80k. + limiter.soft_reserve(1, 100, 80_000, 1_000).unwrap(); + + // Second distinct wid only has 20k left. + assert!(matches!( + limiter.soft_reserve(2, 100, 30_000, 1_000), + Err(RateLimitExceeded) + )); + assert!(limiter.soft_reserve(2, 100, 20_000, 1_000).is_ok()); + } + + #[test] + fn test_soft_reserve_enforces_monotonic_timestamp() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + limiter.soft_reserve(1, 200, 10_000, 1_000).unwrap(); + // A soft reserve older than the latest pending timestamp is rejected. + assert!(limiter.soft_reserve(2, 150, 10_000, 1_000).is_err()); + } + + #[test] + fn test_expire_pending_drops_stale_reservations() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + limiter.soft_reserve(1, 100, 1_000, 1_000).unwrap(); + assert_eq!(limiter.pending_reserves_len(), 1); + + // Just before TTL — nothing expires. + let removed = limiter.expire_pending(1_000 + SOFT_RESERVE_TTL_SECS - 1); + assert_eq!(removed, 0); + assert_eq!(limiter.pending_reserves_len(), 1); + + // Past TTL — entry dropped. + let removed = limiter.expire_pending(1_000 + SOFT_RESERVE_TTL_SECS + 1); + assert_eq!(removed, 1); + assert_eq!(limiter.pending_reserves_len(), 0); + } + + #[test] + fn test_consume_removes_matching_soft_reserve() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + limiter.soft_reserve(42, 100, 50_000, 1_000).unwrap(); + assert_eq!(limiter.pending_reserves_len(), 1); + + limiter.consume(42, 0, 100, 50_000).unwrap(); + assert_eq!(limiter.pending_reserves_len(), 0); + assert_eq!(limiter.state().num_tokens_available, 50_000); // 100k refill - 50k + } + + #[test] + fn test_soft_reserve_leaves_room_for_hard_reserve_of_same_wid() { + let (config, state) = make_limiter(); + let mut limiter = RateLimiter::new(config, state).unwrap(); + + // Reserve all available headroom for a single wid. + limiter.soft_reserve(42, 100, 100_000, 1_000).unwrap(); + + // Hard reserve for the same wid converts the pending reservation. + assert!(limiter.consume(42, 0, 100, 100_000).is_ok()); + assert_eq!(limiter.pending_reserves_len(), 0); } } diff --git a/crates/hashi-types/src/guardian/mod.rs b/crates/hashi-types/src/guardian/mod.rs index 8cce0d159..144b5e13f 100644 --- a/crates/hashi-types/src/guardian/mod.rs +++ b/crates/hashi-types/src/guardian/mod.rs @@ -13,7 +13,9 @@ pub mod s3_utils; pub use limiter::LimiterConfig; pub use limiter::LimiterState; +pub use limiter::PendingReserve; pub use limiter::RateLimiter; +pub use limiter::SOFT_RESERVE_TTL_SECS; pub use time_utils::UnixMillis; pub use time_utils::now_timestamp_ms; pub use time_utils::now_timestamp_secs; diff --git a/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.fds.bin b/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.fds.bin index 71e80f453..5ee0573c0 100644 Binary files a/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.fds.bin and b/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.fds.bin differ diff --git a/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.rs b/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.rs index 162611bdc..b5248e4da 100644 --- a/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.rs +++ b/crates/hashi-types/src/proto/generated/sui.hashi.v1alpha.rs @@ -1122,8 +1122,10 @@ pub struct GetGuardianInfoResponse { /// Signed guardian info (includes server version, encryption pubkey, and optional S3/bucket info). #[prost(message, optional, tag = "3")] pub signed_info: ::core::option::Option, - /// Current rate limiter state (if initialized). Lets clients seed their - /// local seq counter at startup so it survives restarts and leader rotations. + /// Current rate limiter state (if initialized). Clients query this + /// just-in-time per withdrawal to read `next_seq`; the guardian's + /// wid-keyed response cache makes repeated attempts idempotent so no + /// client-side seq bookkeeping is needed. #[prost(message, optional, tag = "4")] pub limiter_state: ::core::option::Option, } @@ -1344,6 +1346,34 @@ pub struct StandardWithdrawalResponseData { #[prost(bytes = "bytes", repeated, tag = "1")] pub enclave_signatures: ::prost::alloc::vec::Vec<::prost::bytes::Bytes>, } +/// Pre-construction rate-limit check. Soft reserves are idempotent on +/// `wid`: re-submitting the same wid extends / returns the existing +/// reservation. The guardian does not debit the bucket until a matching +/// StandardWithdrawal lands (or the reservation TTL elapses). +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SoftReserveWithdrawalRequest { + /// Deterministic withdrawal identifier — same as + /// `StandardWithdrawalRequestData.wid`. Derived on the client from the + /// on-chain request ids so any validator converges on the same value. + #[prost(uint64, optional, tag = "1")] + pub wid: ::core::option::Option, + /// Upper-bound amount in satoshis being reserved (pre-fee sum of the + /// batched user-facing withdrawal amounts). + #[prost(uint64, optional, tag = "2")] + pub amount_sats: ::core::option::Option, + /// Unix seconds timestamp for the reserve, monotonically non-decreasing + /// across successful soft+hard reserves. Clients use the most recent + /// Sui checkpoint clock. + #[prost(uint64, optional, tag = "3")] + pub timestamp_secs: ::core::option::Option, +} +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SoftReserveWithdrawalResponse { + /// Unix seconds at which this reservation will be garbage-collected if + /// no matching StandardWithdrawal has committed it by then. + #[prost(uint64, optional, tag = "1")] + pub expires_at_secs: ::core::option::Option, +} #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum Network { @@ -1581,6 +1611,41 @@ pub mod guardian_service_client { ); self.inner.unary(req, path, codec).await } + /// Soft-reserve a withdrawal slot (light-touch pre-construction check). + /// Any node may call this before committing an on-chain withdrawal tx to + /// learn whether rate-limit headroom exists for `amount_sats`. Soft + /// reserves are idempotent on `wid`, do not debit the bucket, and expire + /// after a server-enforced TTL (so a rogue or crashed caller cannot lock + /// capacity indefinitely). + pub async fn soft_reserve_withdrawal( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/sui.hashi.v1alpha.GuardianService/SoftReserveWithdrawal", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new( + "sui.hashi.v1alpha.GuardianService", + "SoftReserveWithdrawal", + ), + ); + self.inner.unary(req, path, codec).await + } /// Standard withdrawal: request immediate withdrawal signature. pub async fn standard_withdrawal( &mut self, @@ -1658,6 +1723,19 @@ pub mod guardian_service_server { tonic::Response, tonic::Status, >; + /// Soft-reserve a withdrawal slot (light-touch pre-construction check). + /// Any node may call this before committing an on-chain withdrawal tx to + /// learn whether rate-limit headroom exists for `amount_sats`. Soft + /// reserves are idempotent on `wid`, do not debit the bucket, and expire + /// after a server-enforced TTL (so a rogue or crashed caller cannot lock + /// capacity indefinitely). + async fn soft_reserve_withdrawal( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; /// Standard withdrawal: request immediate withdrawal signature. async fn standard_withdrawal( &self, @@ -1925,6 +2003,55 @@ pub mod guardian_service_server { }; Box::pin(fut) } + "/sui.hashi.v1alpha.GuardianService/SoftReserveWithdrawal" => { + #[allow(non_camel_case_types)] + struct SoftReserveWithdrawalSvc(pub Arc); + impl< + T: GuardianService, + > tonic::server::UnaryService + for SoftReserveWithdrawalSvc { + type Response = super::SoftReserveWithdrawalResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::soft_reserve_withdrawal( + &inner, + request, + ) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = SoftReserveWithdrawalSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } "/sui.hashi.v1alpha.GuardianService/StandardWithdrawal" => { #[allow(non_camel_case_types)] struct StandardWithdrawalSvc(pub Arc); diff --git a/crates/hashi/src/grpc/guardian_client.rs b/crates/hashi/src/grpc/guardian_client.rs index 1b23bdd8e..443b505e5 100644 --- a/crates/hashi/src/grpc/guardian_client.rs +++ b/crates/hashi/src/grpc/guardian_client.rs @@ -67,4 +67,15 @@ impl GuardianClient { .await?; Ok(response.into_inner()) } + + pub async fn soft_reserve_withdrawal( + &self, + request: hashi_types::proto::SoftReserveWithdrawalRequest, + ) -> Result { + let response = self + .guardian_service_client() + .soft_reserve_withdrawal(request) + .await?; + Ok(response.into_inner()) + } } diff --git a/crates/hashi/src/leader/mod.rs b/crates/hashi/src/leader/mod.rs index f7b3249c4..8b24155c4 100644 --- a/crates/hashi/src/leader/mod.rs +++ b/crates/hashi/src/leader/mod.rs @@ -41,6 +41,7 @@ use hashi_types::proto::SignWithdrawalRequestApprovalRequest; use hashi_types::proto::SignWithdrawalTransactionRequest; use hashi_types::proto::SignWithdrawalTxConstructionRequest; use hashi_types::proto::SignWithdrawalTxSigningRequest; +use hashi_types::proto::SoftReserveWithdrawalRequest; use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -990,6 +991,25 @@ impl LeaderService { } }; + // Soft reserve against the guardian BEFORE the expensive committee + // BLS fan-out and on-chain commit. Idempotent on `wid` (derived + // deterministically from the selected request_ids) and expires on + // the guardian after a bounded TTL, so a crash or network hiccup + // does not permanently lock capacity. No-op when `guardian_client` + // is unconfigured. + if let Err(e) = Self::soft_reserve_withdrawal_through_guardian( + &inner, + &approval, + checkpoint_timestamp_ms, + ) + .await + { + // Rate limiter or guardian unavailable — skip this iteration; + // the leader will retry on the next tick. + warn!("Soft reserve failed: {e:#}"); + return Ok(()); + } + // Fan out to committee for BLS signatures over the commitment message let members = inner .onchain_state() @@ -1741,6 +1761,80 @@ impl LeaderService { } } + // ======================================================================== + // Guardian: rate-limiting soft reserve (runs before committee fan-out + // for commit_withdrawal_tx) + // ======================================================================== + + /// Probe the guardian for rate-limit headroom before we invest in the + /// committee BLS fan-out + on-chain commit. Idempotent on wid so + /// concurrent probes from multiple leaders converge to the same + /// reservation, and TTL-bounded so a crash does not permanently lock + /// capacity. + /// + /// Returns `Ok(())` on a fresh or refreshed reservation, `Err` when + /// the guardian rejects (rate-limited / unavailable / uninitialized) — + /// the caller should skip the iteration and let the next leader tick + /// retry. + #[tracing::instrument(level = "info", skip_all, fields( + request_count = approval.request_ids.len(), + wid + ))] + async fn soft_reserve_withdrawal_through_guardian( + inner: &Arc, + approval: &WithdrawalTxCommitment, + checkpoint_timestamp_ms: u64, + ) -> anyhow::Result<()> { + let Some(guardian) = inner.guardian_client() else { + return Ok(()); + }; + + let wid = crate::withdrawals::compute_withdrawal_wid(&approval.request_ids); + tracing::Span::current().record("wid", wid); + + // External-out amount: the first N outputs (one per selected + // request) pay the user; any trailing output is change to the + // bridge. The guardian debits the external sum. + let num_requests = approval.request_ids.len(); + let amount_sats: u64 = approval + .outputs + .iter() + .take(num_requests) + .map(|o| o.amount) + .sum(); + + let timestamp_secs = checkpoint_timestamp_ms / 1000; + let proto_request = SoftReserveWithdrawalRequest { + wid: Some(wid), + amount_sats: Some(amount_sats), + timestamp_secs: Some(timestamp_secs), + }; + + match guardian.soft_reserve_withdrawal(proto_request).await { + Ok(response) => { + info!( + amount_sats, + expires_at_secs = response.expires_at_secs.unwrap_or(0), + "Guardian soft-reserved withdrawal", + ); + Ok(()) + } + Err(status) => { + let label = if status.message().contains("Rate limit exceeded") { + "GuardianRateLimited" + } else { + "GuardianUnavailable" + }; + inner + .metrics + .leader_retries_total + .with_label_values(&["withdrawal_commitment", label]) + .inc(); + anyhow::bail!("Guardian soft reserve rejected: {}", status.message()) + } + } + } + // ======================================================================== // Guardian: rate-limiting hard reserve (runs after MPC signing, before // submitting sign_withdrawal on-chain) diff --git a/crates/hashi/src/withdrawals.rs b/crates/hashi/src/withdrawals.rs index 4cf754c9c..c4abf1cae 100644 --- a/crates/hashi/src/withdrawals.rs +++ b/crates/hashi/src/withdrawals.rs @@ -1315,11 +1315,7 @@ pub fn build_guardian_withdrawal_request( let utxos = TxUTXOs::new(inputs, outputs) .map_err(|e| anyhow!("Failed to build guardian TxUTXOs: {e}"))?; - // Deterministic `wid`: leading 8 bytes of Blake2b256 over the BCS-encoded - // request ids. Stable across restarts and leader rotations. - let wid_bytes = bcs::to_bytes(&txn.request_ids).expect("serialization should succeed"); - let wid_hash = Blake2b256::digest(&wid_bytes); - let wid = u64::from_le_bytes(wid_hash.digest[..8].try_into().unwrap()); + let wid = compute_withdrawal_wid(&txn.request_ids); Ok(hashi_types::guardian::StandardWithdrawalRequest::new( wid, @@ -1328,3 +1324,14 @@ pub fn build_guardian_withdrawal_request( seq, )) } + +/// Deterministic withdrawal identifier — the leading 8 bytes of +/// Blake2b256 over the BCS-encoded request ids. Stable across restarts +/// and leader rotations, so any node converges on the same wid for the +/// same request set. Used as the idempotency key for soft reserves and +/// the hard reserve, and as the cache key on the guardian side. +pub fn compute_withdrawal_wid(request_ids: &[Address]) -> u64 { + let bytes = bcs::to_bytes(&request_ids).expect("serialization should succeed"); + let hash = Blake2b256::digest(&bytes); + u64::from_le_bytes(hash.digest[..8].try_into().unwrap()) +}