From b266de4ec209848cdc98a539dd3dfffa110a4936 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 2 Mar 2026 01:38:44 +0900 Subject: [PATCH] perf: document zero-initialized scratch buffers in Rust fallback functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the Rust fallback functions (those ending in _rust), scratch buffers like `let mut mid = [[0i16; MID_STRIDE]; 135]` are zero-initialized but don't need to be — the C version (dav1d's mc_tmpl.c, looprestoration_tmpl.c, itx_tmpl.c) declares these as uninitialized stack variables. The MaybeUninit pattern was already applied successfully in cdef.rs (PR #1397) where buffers are passed to ASM calls. However, in mc.rs, looprestoration.rs, and itx.rs, the Rust fallback buffers are used with safe Rust indexing, making MaybeUninit more invasive for limited benefit since these functions are only used when ASM is unavailable. No buffers were found inside loops that could be hoisted out. This commit adds TODO(perf) comments documenting each buffer that matches an uninitialized C counterpart, following the pattern established in PR #1397 and #1399. Files annotated: - src/mc.rs: 10 buffers across put_8tap, prep_8tap, put_bilin, prep_bilin (regular and scaled variants), and warp_affine functions - src/looprestoration.rs: 5 function-level annotations covering wiener_rust, selfguided_filter, sgr_5x5_rust, sgr_3x3_rust, sgr_mix_rust - src/itx.rs: 2 buffers in inv_txfm_add and inv_txfm_add_wht_wht_4x4 --- src/itx.rs | 8 ++++++++ src/looprestoration.rs | 20 ++++++++++++++++++++ src/mc.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/src/itx.rs b/src/itx.rs index a2646d917..9fa2acf8f 100644 --- a/src/itx.rs +++ b/src/itx.rs @@ -95,6 +95,10 @@ fn inv_txfm_add( let row_clip_max = !row_clip_min; let col_clip_max = !col_clip_min; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's itx_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0; 64 * 64]; let mut c = &mut tmp[..]; for y in 0..sh { @@ -304,6 +308,10 @@ fn inv_txfm_add_wht_wht_4x4_rust( let coeff = &mut coeff[..W * H]; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's itx_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0; W * H]; let mut c = &mut tmp[..]; for y in 0..H { diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 4d024243b..6d5d7ca30 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -391,6 +391,10 @@ fn wiener_rust( ) { // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels // of padding above and below + // TODO(perf): These buffers are zero-initialized but only need to be written + // before read, matching the uninitialized C counterparts in dav1d's looprestoration_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0.into(); (64 + 3 + 3) * REST_UNIT_STRIDE]; padding::(&mut tmp, p, left, lpf, lpf_off, w, h, edges); @@ -650,6 +654,10 @@ fn selfguided_filter( // Selfguided filter is applied to a maximum stripe height of 64 + 3 pixels // of padding above and below + // TODO(perf): These buffers are zero-initialized but only need to be written + // before read, matching the uninitialized C counterparts in dav1d's looprestoration_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut sumsq = [0; (64 + 2 + 2) * REST_UNIT_STRIDE]; // By inverting `a` and `b` after the boxsums, `b` can be of `BD::Coef` instead of `i32`. let mut sum = [0.as_::(); (64 + 2 + 2) * REST_UNIT_STRIDE]; @@ -803,6 +811,10 @@ fn sgr_5x5_rust( ) { // Selfguided filter is applied to a maximum stripe height of 64 + 3 pixels // of padding above and below + // TODO(perf): These buffers are zero-initialized but only need to be written + // before read, matching the uninitialized C counterparts in dav1d's looprestoration_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0.as_(); (64 + 3 + 3) * REST_UNIT_STRIDE]; // Selfguided filter outputs to a maximum stripe height of 64 and a @@ -867,6 +879,10 @@ fn sgr_3x3_rust( edges: LrEdgeFlags, bd: BD, ) { + // TODO(perf): These buffers are zero-initialized but only need to be written + // before read, matching the uninitialized C counterparts in dav1d's looprestoration_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0.as_(); (64 + 3 + 3) * REST_UNIT_STRIDE]; let mut dst = [0.as_(); 64 * 384]; @@ -928,6 +944,10 @@ fn sgr_mix_rust( edges: LrEdgeFlags, bd: BD, ) { + // TODO(perf): These buffers are zero-initialized but only need to be written + // before read, matching the uninitialized C counterparts in dav1d's looprestoration_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut tmp = [0.as_(); (64 + 3 + 3) * REST_UNIT_STRIDE]; let mut dst0 = [0.as_(); 64 * 384]; let mut dst1 = [0.as_(); 64 * 384]; diff --git a/src/mc.rs b/src/mc.rs index a9580aabc..c8e630ae0 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -159,6 +159,10 @@ fn put_8tap_rust( if let Some(fh) = fh { if let Some(fv) = fv { let tmp_h = h + 7; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 135]; // Default::default() for y in 0..tmp_h { @@ -223,6 +227,10 @@ fn put_8tap_scaled_rust( let intermediate_bits = bd.get_intermediate_bits(); let intermediate_rnd = (1 << intermediate_bits) >> 1; let tmp_h = ((h - 1) * dy + my >> 10) + 8; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 256 + 7]; // Default::default() for y in 0..tmp_h { @@ -284,6 +292,10 @@ fn prep_8tap_rust( if let Some(fh) = fh { if let Some(fv) = fv { let tmp_h = h + 7; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 135]; // Default::default() for y in 0..tmp_h { @@ -344,6 +356,10 @@ fn prep_8tap_scaled_rust( ) { let intermediate_bits = bd.get_intermediate_bits(); let tmp_h = ((h - 1) * dy + my >> 10) + 8; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 256 + 7]; // Default::default() for y in 0..tmp_h { @@ -417,6 +433,10 @@ fn put_bilin_rust( if mx != 0 { if my != 0 { + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 129]; // Default::default() let tmp_h = h + 1; @@ -479,6 +499,10 @@ fn put_bilin_scaled_rust( ) { let intermediate_bits = bd.get_intermediate_bits(); let tmp_h = ((h - 1) * dy + my >> 10) + 2; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 256 + 1]; for y in 0..tmp_h { @@ -523,6 +547,10 @@ fn prep_bilin_rust( let intermediate_bits = bd.get_intermediate_bits(); if mx != 0 { if my != 0 { + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 129]; let tmp_h = h + 1; @@ -581,6 +609,10 @@ fn prep_bilin_scaled_rust( ) { let intermediate_bits = bd.get_intermediate_bits(); let tmp_h = ((h - 1) * dy + my >> 10) + 2; + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0i16; MID_STRIDE]; 256 + 1]; for y in 0..tmp_h { @@ -828,6 +860,10 @@ fn warp_affine_8x8_rust( const H: usize = 15; let intermediate_bits = bd.get_intermediate_bits(); + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0; W]; H]; for y in 0..H { @@ -879,6 +915,10 @@ fn warp_affine_8x8t_rust( const H: usize = 15; let intermediate_bits = bd.get_intermediate_bits(); + // TODO(perf): This buffer is zero-initialized but only needs to be written + // before read, matching the uninitialized C counterpart in dav1d's mc_tmpl.c. + // Could use MaybeUninit to avoid zeroing, but this is a Rust fallback function + // (only used when ASM is unavailable). See PR #1397 and #1399. let mut mid = [[0; W]; H]; for y in 0..H {