diff --git a/Cargo.toml b/Cargo.toml index be97417d..e7edd254 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,10 +35,10 @@ itertools = "0.14.0" # Use halo2curves ASM on x86_64 by default; disable ASM on non-x86_64 [target.'cfg(target_arch = "x86_64")'.dependencies] -halo2curves = { version = "0.9.0", features = ["std", "bits", "derive_serde", "asm"] } +halo2curves = { version = "0.9.0", features = ["std", "bits", "derive_serde", "asm", "bn256-table"] } [target.'cfg(not(target_arch = "x86_64"))'.dependencies] -halo2curves = { version = "0.9.0", features = ["std", "bits", "derive_serde"] } +halo2curves = { version = "0.9.0", features = ["std", "bits", "derive_serde", "bn256-table"] } [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.2.15", default-features = false, features = ["js"] } diff --git a/src/provider/bn256_grumpkin.rs b/src/provider/bn256_grumpkin.rs index 479c06fd..a44888ff 100644 --- a/src/provider/bn256_grumpkin.rs +++ b/src/provider/bn256_grumpkin.rs @@ -2,7 +2,7 @@ use crate::{ impl_traits, provider::{ - msm::{msm, msm_small, msm_small_with_max_num_bits}, + msm::{msm, msm_signed, msm_small, msm_small_with_max_num_bits}, traits::{DlogGroup, DlogGroupExt, PairingGroup}, }, traits::{Group, PrimeFieldExt, TranscriptReprTrait}, @@ -63,6 +63,10 @@ impl DlogGroupExt for bn256::Point { msm_small_with_max_num_bits(scalars, bases, max_num_bits) } + fn vartime_multiscalar_mul_signed(scalars: &[i128], bases: &[Self::AffineGroupElement]) -> Self { + msm_signed(scalars, bases) + } + #[cfg(feature = "blitzar")] fn vartime_multiscalar_mul(scalars: &[Self::Scalar], bases: &[Self::AffineGroupElement]) -> Self { super::blitzar::vartime_multiscalar_mul(scalars, bases) diff --git a/src/provider/hyperkzg.rs b/src/provider/hyperkzg.rs index fe13c6c8..efd049e5 100644 --- a/src/provider/hyperkzg.rs +++ b/src/provider/hyperkzg.rs @@ -12,7 +12,7 @@ use crate::{ errors::NovaError, gadgets::utils::to_bignat_repr, provider::{ - msm::batch_add, + msm::{batch_add, batch_add_address_grouped, batch_add_multi_tree}, traits::{DlogGroup, DlogGroupExt, PairingGroup}, }, traits::{ @@ -692,6 +692,47 @@ where Commitment { comm: res } } + fn commit_signed(ck: &Self::CommitmentKey, v: &[i128], r: &E::Scalar) -> Self::Commitment { + assert!(ck.ck.len() >= v.len()); + Commitment { + comm: E::GE::vartime_multiscalar_mul_signed(v, &ck.ck[..v.len()]) + + ::group(&ck.h) * r, + } + } + + fn commit_sparse_signed(ck: &Self::CommitmentKey, v: &[i128], r: &E::Scalar) -> Self::Commitment { + assert!(ck.ck.len() >= v.len()); + let nz_count = v.iter().filter(|&&x| x != 0).count(); + // Only use sparse path when < 25% non-zero (base copy overhead is significant) + if nz_count == 0 { + let mut comm = E::GE::zero(); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + return Commitment { comm }; + } + if nz_count > v.len() / 4 { + return Self::commit_signed(ck, v, r); + } + // Extract sparse entries + let mut sparse_scalars = Vec::with_capacity(nz_count); + let mut sparse_bases = Vec::with_capacity(nz_count); + for (i, &val) in v.iter().enumerate() { + if val != 0 { + sparse_scalars.push(val); + sparse_bases.push(ck.ck[i]); + } + } + Commitment { + comm: E::GE::vartime_multiscalar_mul_signed(&sparse_scalars, &sparse_bases) + + if r != &E::Scalar::ZERO { + ::group(&ck.h) * r + } else { + E::GE::zero() + }, + } + } + fn ck_to_coordinates(ck: &Self::CommitmentKey) -> Vec<(E::Base, E::Base)> { ck.to_coordinates() } @@ -710,6 +751,38 @@ where .collect() } + fn commit_sparse(ck: &Self::CommitmentKey, v: &[E::Scalar], r: &E::Scalar) -> Self::Commitment { + assert!(ck.ck.len() >= v.len()); + let nz_count = v.iter().filter(|&&x| x != E::Scalar::ZERO).count(); + if nz_count == 0 { + let mut comm = E::GE::zero(); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + return Commitment { comm }; + } + // Only use sparse path when < 50% non-zero (break-even with base copy overhead) + if nz_count > v.len() / 2 { + return Self::commit(ck, v, r); + } + let mut sparse_scalars = Vec::with_capacity(nz_count); + let mut sparse_bases = Vec::with_capacity(nz_count); + for (i, &val) in v.iter().enumerate() { + if val != E::Scalar::ZERO { + sparse_scalars.push(val); + sparse_bases.push(ck.ck[i]); + } + } + Commitment { + comm: E::GE::vartime_multiscalar_mul(&sparse_scalars, &sparse_bases) + + if r != &E::Scalar::ZERO { + ::group(&ck.h) * r + } else { + E::GE::zero() + }, + } + } + fn ck_derive_by_address( ck: &Self::CommitmentKey, addresses: &[usize], @@ -744,6 +817,44 @@ where Commitment { comm } } + + fn commit_sparse_binary_batch( + ck: &Self::CommitmentKey, + hot_per_poly: &[&[usize]], + r: &::Scalar, + ) -> Vec { + let comms = batch_add_multi_tree(&ck.ck, hot_per_poly); + comms + .into_iter() + .map(|comm| { + let mut comm = ::group(&comm.into()); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + Commitment { comm } + }) + .collect() + } + + fn commit_address_grouped( + ck: &Self::CommitmentKey, + addrs: &[&[u16]], + num_entries: usize, + subtable_size: usize, + r: &::Scalar, + ) -> Vec { + let comms = batch_add_address_grouped(&ck.ck, addrs, num_entries, subtable_size); + comms + .into_iter() + .map(|comm| { + let mut comm = ::group(&comm.into()); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + Commitment { comm } + }) + .collect() + } } /// Provides an implementation of generators for proving evaluations diff --git a/src/provider/msm.rs b/src/provider/msm.rs index 8e201d3f..5281c437 100644 --- a/src/provider/msm.rs +++ b/src/provider/msm.rs @@ -164,6 +164,45 @@ fn bucket_add_affine(bucket: &mut BucketXYZZ, p: &C) { bucket.zzz *= ppp; } +/// Subtract an affine point from an XYZZ bucket (equivalent to adding negated point). +#[inline(always)] +fn bucket_sub_affine(bucket: &mut BucketXYZZ, p: &C) { + if bool::from(p.is_identity()) { + return; + } + let coords = p.coordinates().unwrap(); + let px = *coords.x(); + let py = -*coords.y(); // Negate y to subtract + + if bucket.is_zero() { + bucket.x = px; + bucket.y = py; + bucket.zz = C::Base::ONE; + bucket.zzz = C::Base::ONE; + return; + } + let u2 = px * bucket.zz; + let s2 = py * bucket.zzz; + + if bucket.x == u2 { + if bucket.y == s2 { + bucket.double_in_place(); + } else { + *bucket = BucketXYZZ::zero(); + } + return; + } + let p_val = u2 - bucket.x; + let r = s2 - bucket.y; + let pp = p_val.square(); + let ppp = p_val * pp; + let q = bucket.x * pp; + bucket.x = r.square() - ppp - q.double(); + bucket.y = r * (q - bucket.x) - bucket.y * ppp; + bucket.zz *= pp; + bucket.zzz *= ppp; +} + /// Convert XYZZ bucket to projective curve point. /// /// Computes affine coordinates `(X/ZZ, Y/ZZZ)` then converts to projective. @@ -182,6 +221,153 @@ fn bucket_to_curve(bucket: &BucketXYZZ) -> C::CurveExt .into() } +// ================================================================================== +// Batch affine pairwise addition (Montgomery batch inversion) +// ================================================================================== + +/// Batch-invert a slice of field elements using Montgomery's trick. +/// After calling, `v[i]` contains `1/v[i]` (original). +/// Cost: 3(N-1) multiplies + 1 inversion. +#[inline] +fn batch_invert_in_place(v: &mut [F]) { + let n = v.len(); + if n == 0 { + return; + } + if n == 1 { + v[0] = v[0].invert().unwrap(); + return; + } + // Forward pass: compute prefix products + let mut prefix = Vec::with_capacity(n); + prefix.push(v[0]); + for i in 1..n { + prefix.push(prefix[i - 1] * v[i]); + } + // Invert total product + let mut inv = prefix[n - 1].invert().unwrap(); + // Backward pass: extract individual inverses + for i in (1..n).rev() { + let vi_inv = prefix[i - 1] * inv; + inv *= v[i]; + v[i] = vi_inv; + } + v[0] = inv; +} + +/// Sum N affine points using batch-affine tree reduction. +/// +/// Instead of sequential XYZZ additions (7M+2S each), this uses a tree-structured +/// pairwise addition with Montgomery batch inversion. Amortized cost per addition: +/// ~4M+1S+3M(batch_inv) = 7M+1S, but with better cache behavior because all +/// intermediate results stay in affine coordinates (64B vs 128B for XYZZ). +/// +/// For N points: performs N-1 additions across log₂(N) tree levels. +/// Each level batch-processes all independent pairs with a single batch inversion. +/// +/// Falls back to XYZZ accumulation for N ≤ 64 (overhead of affine tree not worthwhile). +fn batch_add_affine_tree(points: &[C]) -> C::CurveExt { + let n = points.len(); + if n == 0 { + return C::CurveExt::identity(); + } + if n == 1 { + return points[0].into(); + } + // For small N, XYZZ is faster (no batch inversion overhead) + if n <= 64 { + let mut acc = BucketXYZZ::::zero(); + for p in points { + bucket_add_affine::(&mut acc, p); + } + return bucket_to_curve::(&acc); + } + + // Work buffer: copy input points, reduce in-place + // Use Option to handle identity points cleanly + let mut buf: Vec> = points + .iter() + .map(|p| { + if bool::from(p.is_identity()) { + None + } else { + let c = p.coordinates().unwrap(); + Some((*c.x(), *c.y())) + } + }) + .collect(); + + while buf.len() > 1 { + let len = buf.len(); + let half = len / 2; + + // Compute deltas for batch inversion (exactly half-sized allocation) + let mut deltas: Vec = Vec::with_capacity(half); + for i in 0..half { + deltas.push(match (buf[2 * i], buf[2 * i + 1]) { + (Some((x1, _)), Some((x2, _))) => { + let dx = x2 - x1; + if dx == C::Base::ZERO { + C::Base::ONE + } else { + dx + } + } + _ => C::Base::ONE, + }); + } + + batch_invert_in_place::(&mut deltas); + + // Compute results for each pair + for i in 0..half { + buf[i] = match (buf[2 * i], buf[2 * i + 1]) { + (None, None) => None, + (Some(p), None) | (None, Some(p)) => Some(p), + (Some((x1, y1)), Some((x2, y2))) => { + let dx = x2 - x1; + if dx == C::Base::ZERO { + if y1 == y2 { + if y1 == C::Base::ZERO { + None + } else { + let x1_sq = x1.square(); + let lambda = (x1_sq.double() + x1_sq) * (y1.double()).invert().unwrap(); + let x3 = lambda.square() - x1.double(); + let y3 = lambda * (x1 - x3) - y1; + Some((x3, y3)) + } + } else { + None + } + } else { + let inv_dx = deltas[i]; + let lambda = (y2 - y1) * inv_dx; + let x3 = lambda.square() - x1 - x2; + let y3 = lambda * (x1 - x3) - y1; + Some((x3, y3)) + } + } + }; + } + + // Handle odd element: copy to end of reduced array + if len % 2 == 1 { + buf[half] = buf[len - 1]; + buf.truncate(half + 1); + } else { + buf.truncate(half); + } + } + + match buf[0] { + None => C::CurveExt::identity(), + Some((x, y)) => C::from_xy(x, y) + .expect("batch_add_affine_tree produced invalid point") + .into(), + } +} + // ================================================================================== // Scalar utilities // ================================================================================== @@ -470,6 +656,9 @@ pub fn msm_small + Copy + Sync + ToPrimit scalars: &[T], bases: &[C], ) -> C::Curve { + if scalars.is_empty() { + return C::Curve::identity(); + } let max_num_bits = num_bits(scalars.iter().max().unwrap().to_usize().unwrap()); msm_small_with_max_num_bits(scalars, bases, max_num_bits) } @@ -489,7 +678,7 @@ pub fn msm_small_with_max_num_bits< 0 => C::identity().into(), 1 => msm_binary(scalars, bases), 2..=10 => msm_10(scalars, bases, max_num_bits), - 11..=32 => msm_small_rest(scalars, bases, max_num_bits), + 11..=64 => msm_small_rest(scalars, bases, max_num_bits), _ => { // For >32-bit scalars, halo2curves' msm_best is faster than our // bucket-sort Pippenger (e.g., 192ms vs 244ms at u64, 2^20 points). @@ -502,6 +691,151 @@ pub fn msm_small_with_max_num_bits< } } +/// Multi-scalar multiplication for signed (i128) scalars. +/// +/// Single-pass windowed Pippenger: handles positive and negative scalars +/// in the same bucket accumulation, avoiding the 2× MSM overhead of split pos/neg. +pub fn msm_signed(scalars: &[i128], bases: &[C]) -> C::Curve { + assert_eq!(bases.len(), scalars.len()); + + // Find max magnitude and filter out zeros. + // Callers must ensure magnitudes fit in u64 (e.g., i128 used to avoid i64 overflow). + let mut max_mag: u64 = 0; + for &s in scalars.iter() { + debug_assert!( + s.unsigned_abs() <= u64::MAX as u128, + "msm_signed: scalar magnitude exceeds u64" + ); + max_mag = max_mag.max(s.unsigned_abs() as u64); + } + + if max_mag == 0 { + return C::Curve::identity(); + } + + let bits = num_bits(max_mag as usize); + + // For small magnitudes, use the existing split approach (bucket sort is tuned for unsigned) + if bits <= 10 { + let mut pos = vec![0u64; scalars.len()]; + let mut neg = vec![0u64; scalars.len()]; + for (i, &s) in scalars.iter().enumerate() { + if s >= 0 { + pos[i] = s as u64; + } else { + neg[i] = s.unsigned_abs() as u64; + } + } + return msm_small_with_max_num_bits(&pos, bases, bits) + - msm_small_with_max_num_bits(&neg, bases, bits); + } + + // Windowed Pippenger with signed bucket accumulation + msm_signed_windowed(scalars, bases, bits) +} + +/// Single-pass signed windowed Pippenger MSM. +/// +/// For each c-bit window, positive scalars add to bucket[digit-1], +/// negative scalars subtract (using the negated affine point). +/// This avoids allocating separate pos/neg arrays and running 2× MSMs. +fn msm_signed_windowed( + scalars: &[i128], + bases: &[C], + max_num_bits: usize, +) -> C::Curve { + fn msm_signed_windowed_serial( + scalars: &[i128], + bases: &[C], + max_num_bits: usize, + ) -> C::Curve { + let c = if bases.len() < 32 { + 3 + } else { + // Optimal c ≈ log2(n) for Pippenger: minimizes n*(b/c) + (b/c)*2^c + let ln = compute_ln(bases.len()); + if max_num_bits <= 32 { + (ln + 2).min(max_num_bits) + } else { + // For 33-64 bit signed scalars with ~50% non-zero, + // effective n ≈ len/2, optimal c ≈ ln(n/2) + (ln + 2).clamp(8, 16) + } + }; + + let window_starts: Vec = (0..max_num_bits).step_by(c).collect(); + + let window_sums: Vec = window_starts + .iter() + .map(|&w_start| { + let mut res: BucketXYZZ = BucketXYZZ::zero(); + let mut buckets: Vec> = vec![BucketXYZZ::zero(); (1 << c) - 1]; + + for (&scalar, base) in scalars.iter().zip(bases) { + if scalar == 0 { + continue; + } + let mag = scalar.unsigned_abs() as u64; + let is_neg = scalar < 0; + + if mag == 1 { + if w_start == 0 { + if is_neg { + bucket_sub_affine::(&mut res, base); + } else { + bucket_add_affine::(&mut res, base); + } + } + } else { + let digit = (mag >> w_start) % (1 << c); + if digit != 0 { + let bucket = &mut buckets[(digit - 1) as usize]; + if is_neg { + bucket_sub_affine::(bucket, base); + } else { + bucket_add_affine::(bucket, base); + } + } + } + } + + // Prefix sum + let mut running_sum: BucketXYZZ = BucketXYZZ::zero(); + for b in buckets.into_iter().rev() { + running_sum.add_assign_bucket(&b); + res.add_assign_bucket(&running_sum); + } + bucket_to_curve::(&res) + }) + .collect(); + + let lowest = *window_sums.first().unwrap(); + lowest + + window_sums[1..] + .iter() + .rev() + .fold(C::CurveExt::identity(), |mut total, sum_i| { + total += sum_i; + for _ in 0..c { + total = total.double(); + } + total + }) + } + + let num_threads = current_num_threads(); + if scalars.len() > num_threads { + let chunk_size = scalars.len() / num_threads; + scalars + .par_chunks(chunk_size) + .zip(bases.par_chunks(chunk_size)) + .map(|(s, b)| msm_signed_windowed_serial(s, b, max_num_bits)) + .reduce(C::Curve::identity, |a, b| a + b) + } else { + msm_signed_windowed_serial(scalars, bases, max_num_bits) + } +} + fn msm_binary(scalars: &[T], bases: &[C]) -> C::Curve { assert_eq!(scalars.len(), bases.len()); let num_threads = current_num_threads(); @@ -687,10 +1021,10 @@ fn compute_ln(a: usize) -> usize { #[inline(always)] pub(crate) fn batch_add(bases: &[C], one_indices: &[usize]) -> C::Curve { - fn add_chunk(bases: impl Iterator) -> C::Curve { - let mut acc = C::Curve::identity(); - for base in bases { - acc += base; + fn add_chunk_xyzz(bases: &[C], indices: &[usize]) -> BucketXYZZ { + let mut acc = BucketXYZZ::::zero(); + for &idx in indices { + bucket_add_affine::(&mut acc, &bases[idx]); } acc } @@ -701,10 +1035,246 @@ pub(crate) fn batch_add(bases: &[C], one_indices: &[usize]) -> C let comm = one_indices .par_chunks(chunk_size) .into_par_iter() - .map(|chunk| add_chunk(chunk.iter().map(|index| bases[*index]))) - .reduce(C::Curve::identity, |sum, evl| sum + evl); + .map(|chunk| add_chunk_xyzz::(bases, chunk)) + .reduce(BucketXYZZ::zero, |mut sum, evl| { + sum.add_assign_bucket(&evl); + sum + }); - comm + bucket_to_curve::(&comm) +} + +/// Batch-add multiple sparse binary vectors over the same SRS, deduplicating shared hot +/// indices. When all polys in the group have the same hot index for a given position, the +/// SRS point is accumulated into a shared sum (once) rather than into each accumulator +/// individually. At the end the shared sum is merged into all accumulators. +/// +/// Uses rayon fold/reduce for parallelism: each thread processes a chunk of entries +/// with its own per-poly accumulators and shared accumulator, then merges. +pub fn batch_add_multi(bases: &[C], hot_per_poly: &[&[usize]]) -> Vec { + let n_polys = hot_per_poly.len(); + if n_polys == 0 { + return vec![]; + } + if n_polys == 1 { + return vec![batch_add(bases, hot_per_poly[0])]; + } + + let n_entries = hot_per_poly[0].len(); + debug_assert!(hot_per_poly.iter().all(|h| h.len() == n_entries)); + + // Use XYZZ bucket coordinates for 7M+2S per addition vs ~11M+5S for projective. + let (accs, shared) = (0..n_entries) + .into_par_iter() + .with_min_len(1024) + .fold( + || { + ( + vec![BucketXYZZ::::zero(); n_polys], + BucketXYZZ::::zero(), + ) + }, + |(mut accs, mut shared), t| { + let idx0 = hot_per_poly[0][t]; + let all_same = hot_per_poly[1..].iter().all(|h| h[t] == idx0); + + if all_same { + bucket_add_affine::(&mut shared, &bases[idx0]); + } else { + for (p, hot) in hot_per_poly.iter().enumerate() { + bucket_add_affine::(&mut accs[p], &bases[hot[t]]); + } + } + (accs, shared) + }, + ) + .reduce( + || { + ( + vec![BucketXYZZ::::zero(); n_polys], + BucketXYZZ::::zero(), + ) + }, + |(mut a_accs, mut a_shared), (b_accs, b_shared)| { + for (a, b) in a_accs.iter_mut().zip(b_accs.iter()) { + a.add_assign_bucket(b); + } + a_shared.add_assign_bucket(&b_shared); + (a_accs, a_shared) + }, + ); + + // Merge shared sum into every accumulator, then convert to curve points + let shared_curve = bucket_to_curve::(&shared); + accs + .iter() + .map(|acc| bucket_to_curve::(acc) + shared_curve) + .collect() +} + +/// Batch-add multiple sparse binary vectors using affine tree reduction. +/// +/// Same semantics as `batch_add_multi` but uses `batch_add_affine_tree` for each +/// poly's accumulation. This trades the per-addition cost of XYZZ (7M+2S) for +/// batch-affine (5M+1S amortized) at the cost of gathering SRS points into +/// contiguous buffers. +/// +/// Best for single-threaded or low-thread-count scenarios where the per-addition +/// cost dominates over parallelism benefits. +pub fn batch_add_multi_tree( + bases: &[C], + hot_per_poly: &[&[usize]], +) -> Vec { + let n_polys = hot_per_poly.len(); + if n_polys == 0 { + return vec![]; + } + let n_entries = hot_per_poly[0].len(); + if n_entries == 0 { + return vec![C::Curve::identity(); n_polys]; + } + debug_assert!(hot_per_poly.iter().all(|h| h.len() == n_entries)); + + if n_polys == 1 { + let gathered: Vec = hot_per_poly[0].iter().map(|&i| bases[i]).collect(); + return vec![batch_add_affine_tree(&gathered)]; + } + + // Phase 1: Classify entries into all_same vs different + let mut shared_indices: Vec = Vec::with_capacity(n_entries); + let mut diff_positions: Vec = Vec::with_capacity(n_entries); + + for t in 0..n_entries { + let idx0 = hot_per_poly[0][t]; + if hot_per_poly[1..].iter().all(|h| h[t] == idx0) { + shared_indices.push(idx0); + } else { + diff_positions.push(t); + } + } + + // Phase 2: Compute shared sum via affine tree + let shared_points: Vec = shared_indices.iter().map(|&i| bases[i]).collect(); + let shared_curve = batch_add_affine_tree(&shared_points); + + // Phase 3: For each poly, gather its non-shared points and tree-reduce + // Process one poly at a time to reuse the gather buffer (~4.5MB) + let mut results = Vec::with_capacity(n_polys); + let mut gather_buf: Vec = Vec::with_capacity(diff_positions.len()); + + for p in 0..n_polys { + gather_buf.clear(); + for &t in &diff_positions { + gather_buf.push(bases[hot_per_poly[p][t]]); + } + let poly_sum = batch_add_affine_tree(&gather_buf); + results.push(poly_sum + shared_curve); + } + + results +} + +/// Commit chunked RA polynomials with address-grouped accumulation. +/// +/// For `n_chunks` polynomials where SRS index = `addr * num_entries + i`, +/// groups entries by address value within each chunk for sequential SRS access, +/// then uses affine tree reduction per group. +/// +/// This exploits address locality when each chunk only hits a small number +/// of distinct SRS address blocks (e.g., `subtable_size` = 16). +pub fn batch_add_address_grouped( + bases: &[C], + addrs: &[&[u16]], + num_entries: usize, + subtable_size: usize, +) -> Vec { + let n_chunks = addrs.len(); + if n_chunks == 0 { + return vec![]; + } + assert!( + n_chunks <= 32, + "batch_add_address_grouped: n_chunks={n_chunks} exceeds u32 bitmask capacity" + ); + debug_assert!(addrs.iter().all(|a| a.len() == num_entries)); + + // Phase 1: Classify entries into shared (all chunks same addr) and per-chunk groups. + // For shared entries, group by address value. + let mut shared_by_addr: Vec> = vec![Vec::new(); subtable_size]; + let mut diff_by_chunk_addr: Vec>> = + vec![vec![Vec::new(); subtable_size]; n_chunks]; + + for t in 0..num_entries { + let a0 = addrs[0][t]; + let all_same = addrs[1..].iter().all(|a| a[t] == a0); + + if all_same { + shared_by_addr[a0 as usize].push(t); + } else { + for (c, chunk_addrs) in addrs.iter().enumerate() { + diff_by_chunk_addr[c][chunk_addrs[t] as usize].push(t); + } + } + } + + // Phase 2+3: Sequential SRS scan with per-address bitmask. + // For each SRS address block, scan t=0..T sequentially (perfect prefetch). + // Use a bitmask to identify which chunks need each entry. + // Shared entries get a separate accumulator added to all chunk results. + let mut shared_acc = BucketXYZZ::::zero(); + let mut chunk_accums: Vec> = vec![BucketXYZZ::zero(); n_chunks]; + + // Build per-address bitmask: mask[t] has bit c set if chunk c maps entry t to this address + let mut mask = vec![0u32; num_entries]; + // Separate shared flag to avoid bit collision when n_chunks >= 32 + let mut is_shared = vec![false; num_entries]; + + for a in 0..subtable_size { + let base_offset = a * num_entries; + + // Fill mask for this address + for (c, diff_addrs) in diff_by_chunk_addr.iter().enumerate().take(n_chunks) { + for &t in &diff_addrs[a] { + mask[t] |= 1u32 << c; + } + } + // Mark shared entries + for &t in &shared_by_addr[a] { + is_shared[t] = true; + } + + // Sequential scan through SRS block [a*T, (a+1)*T) + for t in 0..num_entries { + if is_shared[t] { + let pt = &bases[base_offset + t]; + bucket_add_affine::(&mut shared_acc, pt); + is_shared[t] = false; + } else { + let m = mask[t]; + if m == 0 { + continue; + } + let pt = &bases[base_offset + t]; + // Per-chunk entry: add to each matching chunk + let mut bits = m; + while bits != 0 { + let c = bits.trailing_zeros() as usize; + bucket_add_affine::(&mut chunk_accums[c], pt); + bits &= bits - 1; + } + } + // Clear mask for reuse + mask[t] = 0; + } + } + + let shared_curve = bucket_to_curve::(&shared_acc); + let mut results = Vec::with_capacity(n_chunks); + for accum in chunk_accums.iter().take(n_chunks) { + results.push(bucket_to_curve::(accum) + shared_curve); + } + + results } #[cfg(test)] @@ -819,4 +1389,57 @@ mod tests { test_msm_identity_bases_with::(); test_msm_identity_bases_with::(); } + + fn test_batch_add_affine_tree_with>() { + for &n in &[0, 1, 2, 3, 5, 16, 63, 64, 65, 128, 255, 256, 500, 1000] { + let points: Vec = (0..n) + .map(|_| A::from(A::generator() * F::random(OsRng))) + .collect(); + + let expected: A::CurveExt = points + .iter() + .fold(A::CurveExt::identity(), |acc, p| acc + *p); + let got = batch_add_affine_tree(&points); + assert_eq!(expected, got, "batch_add_affine_tree mismatch at n={n}"); + } + } + + #[test] + fn test_batch_add_affine_tree() { + test_batch_add_affine_tree_with::(); + test_batch_add_affine_tree_with::(); + } + + fn test_batch_add_multi_tree_with>() { + let n_bases = 256; + let n_entries = 200; + let n_polys = 8; + + let bases: Vec = (0..n_bases) + .map(|_| A::from(A::generator() * F::random(OsRng))) + .collect(); + + let hot_vecs: Vec> = (0..n_polys) + .map(|_| { + (0..n_entries) + .map(|_| rand::random::() % n_bases) + .collect() + }) + .collect(); + let hot_refs: Vec<&[usize]> = hot_vecs.iter().map(|v| v.as_slice()).collect(); + + let expected = batch_add_multi(&bases, &hot_refs); + let got = batch_add_multi_tree(&bases, &hot_refs); + + assert_eq!(expected.len(), got.len()); + for (i, (e, g)) in expected.iter().zip(got.iter()).enumerate() { + assert_eq!(*e, *g, "batch_add_multi_tree mismatch at poly {i}"); + } + } + + #[test] + fn test_batch_add_multi_tree() { + test_batch_add_multi_tree_with::(); + test_batch_add_multi_tree_with::(); + } } diff --git a/src/provider/pasta.rs b/src/provider/pasta.rs index fcc3981a..5992d951 100644 --- a/src/provider/pasta.rs +++ b/src/provider/pasta.rs @@ -2,7 +2,7 @@ use crate::{ impl_traits, provider::{ - msm::{msm, msm_small, msm_small_with_max_num_bits}, + msm::{msm, msm_signed, msm_small, msm_small_with_max_num_bits}, traits::{DlogGroup, DlogGroupExt}, }, traits::{Group, PrimeFieldExt, TranscriptReprTrait}, diff --git a/src/provider/pedersen.rs b/src/provider/pedersen.rs index e4c7e915..d2ec68f0 100644 --- a/src/provider/pedersen.rs +++ b/src/provider/pedersen.rs @@ -1,14 +1,16 @@ //! This module provides an implementation of a commitment engine -use crate::provider::msm::batch_add; #[cfg(feature = "io")] use crate::provider::ptau::{read_points, write_points, PtauFileError}; -use crate::traits::evm_serde::EvmCompatSerde; use crate::{ errors::NovaError, gadgets::utils::to_bignat_repr, - provider::traits::{DlogGroup, DlogGroupExt}, + provider::{ + msm::{batch_add, batch_add_address_grouped, batch_add_multi}, + traits::{DlogGroup, DlogGroupExt}, + }, traits::{ commitment::{CommitmentEngineTrait, CommitmentTrait, Len}, + evm_serde::EvmCompatSerde, AbsorbInRO2Trait, AbsorbInROTrait, Engine, Group, ROTrait, TranscriptReprTrait, }, }; @@ -326,6 +328,14 @@ where Commitment { comm: res } } + fn commit_signed(ck: &Self::CommitmentKey, v: &[i128], r: &E::Scalar) -> Self::Commitment { + assert!(ck.ck.len() >= v.len()); + Commitment { + comm: E::GE::vartime_multiscalar_mul_signed(v, &ck.ck[..v.len()]) + + ::group(&ck.h) * r, + } + } + fn derandomize( dk: &Self::DerandKey, commit: &Self::Commitment, @@ -428,6 +438,44 @@ where Commitment { comm } } + + fn commit_sparse_binary_batch( + ck: &Self::CommitmentKey, + hot_per_poly: &[&[usize]], + r: &::Scalar, + ) -> Vec { + let comms = batch_add_multi(&ck.ck, hot_per_poly); + comms + .into_iter() + .map(|comm| { + let mut comm = ::group(&comm.into()); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + Commitment { comm } + }) + .collect() + } + + fn commit_address_grouped( + ck: &Self::CommitmentKey, + addrs: &[&[u16]], + num_entries: usize, + subtable_size: usize, + r: &::Scalar, + ) -> Vec { + let comms = batch_add_address_grouped(&ck.ck, addrs, num_entries, subtable_size); + comms + .into_iter() + .map(|comm| { + let mut comm = ::group(&comm.into()); + if r != &E::Scalar::ZERO { + comm += ::group(&ck.h) * r; + } + Commitment { comm } + }) + .collect() + } } /// A trait listing properties of a commitment key that can be managed in a divide-and-conquer fashion diff --git a/src/provider/secp_secq.rs b/src/provider/secp_secq.rs index d3ec21b4..b4c08361 100644 --- a/src/provider/secp_secq.rs +++ b/src/provider/secp_secq.rs @@ -2,7 +2,7 @@ use crate::{ impl_traits, provider::{ - msm::{msm, msm_small, msm_small_with_max_num_bits}, + msm::{msm, msm_signed, msm_small, msm_small_with_max_num_bits}, traits::{DlogGroup, DlogGroupExt}, }, traits::{Group, PrimeFieldExt, TranscriptReprTrait}, diff --git a/src/provider/traits.rs b/src/provider/traits.rs index 674600e6..6ea69e50 100644 --- a/src/provider/traits.rs +++ b/src/provider/traits.rs @@ -113,6 +113,10 @@ pub trait DlogGroupExt: DlogGroup { .map(|scalar| Self::vartime_multiscalar_mul_small(scalar, &bases[..scalar.len()])) .collect::>() } + + /// A method to compute a multiexponentation with signed (i64) scalars. + /// Splits into positive/negative MSMs for efficient handling. + fn vartime_multiscalar_mul_signed(scalars: &[i128], bases: &[Self::AffineGroupElement]) -> Self; } /// A trait that defines extensions to the DlogGroup trait, to be implemented for @@ -387,6 +391,13 @@ macro_rules! impl_traits { ) -> Self { msm_small_with_max_num_bits(scalars, bases, max_num_bits) } + + fn vartime_multiscalar_mul_signed( + scalars: &[i128], + bases: &[Self::AffineGroupElement], + ) -> Self { + msm_signed(scalars, bases) + } } }; } diff --git a/src/spartan/polys/compact.rs b/src/spartan/polys/compact.rs new file mode 100644 index 00000000..b26d7c28 --- /dev/null +++ b/src/spartan/polys/compact.rs @@ -0,0 +1,536 @@ +//! Compact multilinear polynomial: stores evaluations as small integer types +//! (bool, u8, u16, u32, u64, i64) and defers conversion to field elements. +//! +//! After the first `bind` round, coefficients are promoted to field elements. +//! The first sumcheck eval round can use integer arithmetic to avoid expensive +//! field multiplications, reducing both memory and compute. + +use crate::constants::PARALLEL_THRESHOLD; +use crate::spartan::math::Math; +use ff::PrimeField; +use rayon::prelude::*; +use serde::{Deserialize, Serialize}; + +/// Trait for small scalar types that can be stored compactly and converted to field elements. +pub trait SmallScalar: Copy + Send + Sync + Default + 'static { + /// Convert this value to a field element. + fn to_field(self) -> F; + + /// Compute `(hi - lo)` as a field element, potentially using cheaper integer arithmetic. + fn diff_to_field(lo: Self, hi: Self) -> F; + + /// Maximum number of bits needed to represent any value of this type. + /// Used by MSM to select optimal algorithm. + fn max_bits() -> u32; +} + +impl SmallScalar for bool { + #[inline(always)] + fn to_field(self) -> F { + if self { + F::ONE + } else { + F::ZERO + } + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + match (lo, hi) { + (false, false) | (true, true) => F::ZERO, + (false, true) => F::ONE, + (true, false) => F::ZERO - F::ONE, + } + } + + fn max_bits() -> u32 { + 1 + } +} + +impl SmallScalar for u8 { + #[inline(always)] + fn to_field(self) -> F { + F::from(self as u64) + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + if hi >= lo { + F::from((hi - lo) as u64) + } else { + F::ZERO - F::from((lo - hi) as u64) + } + } + + fn max_bits() -> u32 { + 8 + } +} + +impl SmallScalar for u16 { + #[inline(always)] + fn to_field(self) -> F { + F::from(self as u64) + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + if hi >= lo { + F::from((hi - lo) as u64) + } else { + F::ZERO - F::from((lo - hi) as u64) + } + } + + fn max_bits() -> u32 { + 16 + } +} + +impl SmallScalar for u32 { + #[inline(always)] + fn to_field(self) -> F { + F::from(self as u64) + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + if hi >= lo { + F::from((hi - lo) as u64) + } else { + F::ZERO - F::from((lo - hi) as u64) + } + } + + fn max_bits() -> u32 { + 32 + } +} + +impl SmallScalar for u64 { + #[inline(always)] + fn to_field(self) -> F { + F::from(self) + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + if hi >= lo { + F::from(hi - lo) + } else { + F::ZERO - F::from(lo - hi) + } + } + + fn max_bits() -> u32 { + 64 + } +} + +impl SmallScalar for i64 { + #[inline(always)] + fn to_field(self) -> F { + if self >= 0 { + F::from(self as u64) + } else { + F::ZERO - F::from(self.unsigned_abs()) + } + } + + #[inline(always)] + fn diff_to_field(lo: Self, hi: Self) -> F { + let diff = hi as i128 - lo as i128; + if diff >= 0 { + F::from(diff as u64) + } else { + F::ZERO - F::from((-diff) as u64) + } + } + + fn max_bits() -> u32 { + 64 + } +} + +/// A multilinear polynomial stored in compact form using small scalar type `T`. +/// +/// Coefficients are stored as `Vec` until the first bind operation, at which +/// point they are promoted to `Vec`. This saves both memory (1-8 bytes +/// per entry instead of 32) and compute (integer diff instead of field sub). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompactPolynomial { + num_vars: usize, + /// Compact coefficients (before any binding). + /// `None` after the first bind promotes them to `bound_coeffs`. + coeffs: Option>, + /// Field-element coefficients (populated after first bind, or on demand). + bound_coeffs: Vec, +} + +impl CompactPolynomial { + /// Create a new compact polynomial from small-type evaluations. + pub fn new(coeffs: Vec) -> Self { + let num_vars = coeffs.len().log_2(); + assert_eq!(coeffs.len(), 1 << num_vars); + CompactPolynomial { + num_vars, + coeffs: Some(coeffs), + bound_coeffs: Vec::new(), + } + } + + /// Returns the number of variables. + pub const fn get_num_vars(&self) -> usize { + self.num_vars + } + + /// Returns the total number of evaluations. + pub fn len(&self) -> usize { + if let Some(ref c) = self.coeffs { + c.len() + } else { + self.bound_coeffs.len() + } + } + + /// Returns true if the polynomial has no evaluations. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns true if still in compact (pre-bind) form. + pub fn is_compact(&self) -> bool { + self.coeffs.is_some() + } + + /// Access compact coefficients (panics if already promoted). + pub fn compact_coeffs(&self) -> &[T] { + self + .coeffs + .as_ref() + .expect("already promoted to field elements") + } + + /// Access field-element coefficients (panics if still compact). + pub fn field_coeffs(&self) -> &[Scalar] { + assert!(self.coeffs.is_none(), "still in compact form"); + &self.bound_coeffs + } + + /// Get coefficients as field elements, converting if necessary. + /// Does not consume or modify the polynomial. + pub fn to_field_vec(&self) -> Vec { + if let Some(ref c) = self.coeffs { + if c.len() < PARALLEL_THRESHOLD { + c.iter().map(|v| v.to_field()).collect() + } else { + c.par_iter().map(|v| v.to_field()).collect() + } + } else { + self.bound_coeffs.clone() + } + } + + /// Bind the top variable with challenge `r`. + /// + /// First call: promotes compact `Vec` → `Vec` of half size. + /// Subsequent calls: operates on `Vec` like standard MLE bind. + pub fn bind_poly_var_top(&mut self, r: &Scalar) { + assert!(self.num_vars > 0); + + if let Some(coeffs) = self.coeffs.take() { + // First bind: compact → field, computing lo + r*(hi-lo) in one pass + let n = coeffs.len() / 2; + if n < PARALLEL_THRESHOLD { + self.bound_coeffs = coeffs[..n] + .iter() + .zip(coeffs[n..].iter()) + .map(|(&lo, &hi)| { + let lo_f: Scalar = lo.to_field(); + lo_f + *r * T::diff_to_field::(lo, hi) + }) + .collect(); + } else { + self.bound_coeffs = coeffs[..n] + .par_iter() + .zip(coeffs[n..].par_iter()) + .map(|(&lo, &hi)| { + let lo_f: Scalar = lo.to_field(); + lo_f + *r * T::diff_to_field::(lo, hi) + }) + .collect(); + } + } else { + // Subsequent binds: standard field-element bind + let n = self.bound_coeffs.len() / 2; + let (left, right) = self.bound_coeffs.split_at_mut(n); + + if n < PARALLEL_THRESHOLD { + left.iter_mut().zip(right.iter()).for_each(|(a, b)| { + *a += *r * (*b - *a); + }); + } else { + left + .par_iter_mut() + .zip(right.par_iter()) + .for_each(|(a, b)| { + *a += *r * (*b - *a); + }); + } + + self.bound_coeffs.truncate(n); + } + + self.num_vars -= 1; + } + + /// Evaluate at a point without binding (non-mutating). + pub fn evaluate(&self, r: &[Scalar]) -> Scalar { + assert_eq!(r.len(), self.num_vars); + if let Some(ref c) = self.coeffs { + Self::evaluate_compact(c, r) + } else { + // Already promoted, use standard evaluation + crate::spartan::polys::multilinear::MultilinearPolynomial::evaluate_with( + &self.bound_coeffs, + r, + ) + } + } + + /// Evaluate compact coefficients at a point using sqrt decomposition. + fn evaluate_compact(z: &[T], r: &[Scalar]) -> Scalar { + use crate::spartan::polys::eq::EqPolynomial; + + let s = r.len(); + let s_right = s / 2; + let s_left = s - s_right; + let n_left = 1 << s_left; + let n_right = 1 << s_right; + + let eq_left = EqPolynomial::evals_from_points(&r[..s_left]); + let eq_right = EqPolynomial::evals_from_points(&r[s_left..]); + + let reduced: Vec = (0..n_left) + .into_par_iter() + .map(|i| { + let chunk = &z[i * n_right..(i + 1) * n_right]; + chunk + .iter() + .zip(eq_right.iter()) + .map(|(v, e)| v.to_field::() * *e) + .sum() + }) + .collect(); + + reduced + .into_par_iter() + .zip(eq_left.into_par_iter()) + .map(|(r, e)| r * e) + .sum() + } + + /// Compute the quadratic evaluation points for the first sumcheck round + /// when the polynomial is still in compact form. + /// + /// Returns (eval_0, eval_2) where: + /// - eval_0 = Σ_i Z[i] * eq[i] (lower half, r=0) + /// - eval_2 = Σ_i (2*Z[n+i] - Z[i]) * (2*eq[n+i] - eq[i]) (r=2) + /// + /// This avoids field multiplications for the Z terms in compact form. + pub fn evaluation_points_quadratic_with_eq(&self, eq: &[Scalar]) -> (Scalar, Scalar) { + if let Some(ref c) = self.coeffs { + let n = c.len() / 2; + assert_eq!(eq.len(), c.len()); + + if n < PARALLEL_THRESHOLD { + let (e0, e2) = c[..n] + .iter() + .zip(c[n..].iter()) + .zip(eq[..n].iter()) + .zip(eq[n..].iter()) + .fold( + (Scalar::ZERO, Scalar::ZERO), + |(mut e0, mut e2), (((&lo, &hi), &eq_lo), &eq_hi)| { + let lo_f: Scalar = lo.to_field(); + e0 += lo_f * eq_lo; + let hi_f: Scalar = hi.to_field(); + let z_2 = hi_f + hi_f - lo_f; + let eq_2 = eq_hi + eq_hi - eq_lo; + e2 += z_2 * eq_2; + (e0, e2) + }, + ); + (e0, e2) + } else { + let (e0, e2): (Scalar, Scalar) = c[..n] + .par_iter() + .zip(c[n..].par_iter()) + .zip(eq[..n].par_iter()) + .zip(eq[n..].par_iter()) + .map(|(((&lo, &hi), &eq_lo), &eq_hi)| { + let lo_f: Scalar = lo.to_field(); + let e0 = lo_f * eq_lo; + let hi_f: Scalar = hi.to_field(); + let z_2 = hi_f + hi_f - lo_f; + let eq_2 = eq_hi + eq_hi - eq_lo; + let e2 = z_2 * eq_2; + (e0, e2) + }) + .reduce( + || (Scalar::ZERO, Scalar::ZERO), + |(a0, a2), (b0, b2)| (a0 + b0, a2 + b2), + ); + (e0, e2) + } + } else { + // Already promoted — use field arithmetic + let n = self.bound_coeffs.len() / 2; + assert_eq!(eq.len(), self.bound_coeffs.len()); + let z = &self.bound_coeffs; + + if n < PARALLEL_THRESHOLD { + let (e0, e2) = z[..n] + .iter() + .zip(z[n..].iter()) + .zip(eq[..n].iter()) + .zip(eq[n..].iter()) + .fold( + (Scalar::ZERO, Scalar::ZERO), + |(mut e0, mut e2), (((&lo, &hi), &eq_lo), &eq_hi)| { + e0 += lo * eq_lo; + let z_2 = hi + hi - lo; + let eq_2 = eq_hi + eq_hi - eq_lo; + e2 += z_2 * eq_2; + (e0, e2) + }, + ); + (e0, e2) + } else { + let (e0, e2): (Scalar, Scalar) = z[..n] + .par_iter() + .zip(z[n..].par_iter()) + .zip(eq[..n].par_iter()) + .zip(eq[n..].par_iter()) + .map(|(((&lo, &hi), &eq_lo), &eq_hi)| { + let e0 = lo * eq_lo; + let z_2 = hi + hi - lo; + let eq_2 = eq_hi + eq_hi - eq_lo; + let e2 = z_2 * eq_2; + (e0, e2) + }) + .reduce( + || (Scalar::ZERO, Scalar::ZERO), + |(a0, a2), (b0, b2)| (a0 + b0, a2 + b2), + ); + (e0, e2) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::provider::pasta::pallas; + use crate::spartan::polys::multilinear::MultilinearPolynomial; + use ff::Field; + + type F = pallas::Scalar; + + #[test] + fn test_compact_bool_bind() { + let vals = vec![true, false, true, true, false, false, true, false]; + let mut compact = CompactPolynomial::::new(vals.clone()); + + let dense_vals: Vec = vals + .iter() + .map(|&b| if b { F::ONE } else { F::ZERO }) + .collect(); + let mut dense = MultilinearPolynomial::new(dense_vals); + + let r = F::from(7u64); + + compact.bind_poly_var_top(&r); + dense.bind_poly_var_top(&r); + + assert!(!compact.is_compact()); + assert_eq!(compact.bound_coeffs, dense.Z[..dense.len()].to_vec()); + } + + #[test] + fn test_compact_u64_bind() { + let vals: Vec = vec![100, 200, 300, 400]; + let mut compact = CompactPolynomial::::new(vals.clone()); + + let dense_vals: Vec = vals.iter().map(|&v| F::from(v)).collect(); + let mut dense = MultilinearPolynomial::new(dense_vals); + + let r = F::from(42u64); + + compact.bind_poly_var_top(&r); + dense.bind_poly_var_top(&r); + + assert_eq!(compact.bound_coeffs, dense.Z[..dense.len()].to_vec()); + } + + #[test] + fn test_compact_evaluate() { + let vals: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8]; + let compact = CompactPolynomial::::new(vals.clone()); + + let dense_vals: Vec = vals.iter().map(|&v| F::from(v as u64)).collect(); + let dense = MultilinearPolynomial::new(dense_vals); + + let point = vec![F::from(3u64), F::from(5u64), F::from(7u64)]; + + let eval_compact = compact.evaluate(&point); + let eval_dense = dense.evaluate(&point); + + assert_eq!(eval_compact, eval_dense); + } + + #[test] + fn test_compact_i64_negative() { + let vals: Vec = vec![-5, 10, -3, 7]; + let mut compact = CompactPolynomial::::new(vals.clone()); + + let dense_vals: Vec = vals + .iter() + .map(|&v| { + if v >= 0 { + F::from(v as u64) + } else { + F::ZERO - F::from((-v) as u64) + } + }) + .collect(); + let mut dense = MultilinearPolynomial::new(dense_vals); + + let r = F::from(11u64); + + compact.bind_poly_var_top(&r); + dense.bind_poly_var_top(&r); + + assert_eq!(compact.bound_coeffs, dense.Z[..dense.len()].to_vec()); + } + + #[test] + fn test_compact_multi_bind() { + let vals: Vec = (0..16).collect(); + let mut compact = CompactPolynomial::::new(vals.clone()); + + let dense_vals: Vec = vals.iter().map(|&v| F::from(v as u64)).collect(); + let mut dense = MultilinearPolynomial::new(dense_vals); + + for i in 0..4 { + let r = F::from((i * 3 + 7) as u64); + compact.bind_poly_var_top(&r); + dense.bind_poly_var_top(&r); + } + + assert_eq!(compact.bound_coeffs.len(), 1); + assert_eq!(compact.bound_coeffs[0], dense.Z[0]); + } +} diff --git a/src/spartan/polys/mod.rs b/src/spartan/polys/mod.rs index 08772cfc..111cc026 100644 --- a/src/spartan/polys/mod.rs +++ b/src/spartan/polys/mod.rs @@ -1,5 +1,8 @@ //! This module contains the definitions of polynomial types used in the Spartan SNARK. +/// Module providing compact multilinear polynomial types. +pub mod compact; + /// Module providing the equality polynomial. pub mod eq; diff --git a/src/traits/commitment.rs b/src/traits/commitment.rs index e983617e..c81733af 100644 --- a/src/traits/commitment.rs +++ b/src/traits/commitment.rs @@ -110,6 +110,25 @@ pub trait CommitmentEngineTrait: Clone + Send + Sync { r: &E::Scalar, ) -> Self::Commitment; + /// Commits to a batch of sparse binary vectors, deduplicating shared hot indices. + /// Default: delegates to individual commit_sparse_binary calls. + fn commit_sparse_binary_batch( + ck: &Self::CommitmentKey, + hot_per_poly: &[&[usize]], + r: &E::Scalar, + ) -> Vec; + + /// Commits to chunked RA polynomials with address-grouped accumulation. + /// Each polynomial's hot index is `addr * num_entries + i`, where addr ∈ 0..subtable_size. + /// Groups entries by address for sequential SRS access + affine tree reduction. + fn commit_address_grouped( + ck: &Self::CommitmentKey, + addrs: &[&[u16]], + num_entries: usize, + subtable_size: usize, + r: &E::Scalar, + ) -> Vec; + /// Commits to the provided vector of "small" scalars (at most 64 bits) using the provided generators and random blind fn commit_small + Copy + Sync + ToPrimitive>( ck: &Self::CommitmentKey, @@ -126,6 +145,24 @@ pub trait CommitmentEngineTrait: Clone + Send + Sync { max_num_bits: usize, ) -> Self::Commitment; + /// Commits to the provided vector of signed i64 scalars using the provided generators and random blind. + /// Internally splits into positive/negative MSMs for efficient handling. + fn commit_signed(ck: &Self::CommitmentKey, v: &[i128], r: &E::Scalar) -> Self::Commitment; + + /// Commits to a sparse signed vector, skipping zero entries for faster MSM. + /// Falls back to `commit_signed` when the vector is mostly non-zero. + fn commit_sparse_signed(ck: &Self::CommitmentKey, v: &[i128], r: &E::Scalar) -> Self::Commitment { + // Default: delegate to dense signed commit + Self::commit_signed(ck, v, r) + } + + /// Commits to a sparse Fr vector, skipping zero entries for faster MSM. + /// Falls back to `commit` when the vector is mostly non-zero. + fn commit_sparse(ck: &Self::CommitmentKey, v: &[E::Scalar], r: &E::Scalar) -> Self::Commitment { + // Default: delegate to dense commit + Self::commit(ck, v, r) + } + /// Batch commits to the provided vectors of "small" scalars (at most 64 bits) using the provided generators and random blind fn batch_commit_small + Copy + Sync + ToPrimitive>( ck: &Self::CommitmentKey,