From e65bd943ffea711575dcd9c9cb24cd8af3a9a4e8 Mon Sep 17 00:00:00 2001 From: Michael Kubacki Date: Mon, 30 Jun 2025 10:05:09 -0400 Subject: [PATCH] x64: Prevent modified `rdi` in PageTableArchX64.zero_page() Per Rust inline assembly docs (https://doc.rust-lang.org/reference/inline-assembly.html#operand-type), `in` states: - The allocated register will contain the value of at the start of the assembly code. - The allocated register must contain the same value at the end of the assembly code (except if a lateout is allocated to the same register). This code: ```rust unsafe fn zero_page(base: VirtualAddress) { let _page: u64 = base.into(); #[cfg(all(not(test), target_arch = "x86_64"))] unsafe { asm!( "cld", // Clear the direction flag so that we increment rdi with each store "rep stosq", // Repeat the store of qword in rax to [rdi] rcx times in("rcx") 0x200, // we write 512 qwords (4096 bytes) in("rdi") _page, // start at the page in("rax") 0, // store 0 options(nostack, preserves_flags) ); } } ``` Will capture the `_page` address at the start of the assembly code into `rdi`. However, it uses `rep stosq` which is going to: - Store the `rax` value in `[rdi]` (`0`) - Repat the quad word copy `rcx` times (`0x200`) - For each operation, `rdx` will be incremented by `8` (qword) - Going forward due to `cld` - Decrement `rcx` until it is zero At the end, `rdx` is now `_page` + (`rcx` * 8) = `_page` + `0x1000` I think it then makes sense, to manually restore the `rdi` value since the code is inlined. This uses `r8` with `out` to let the compiler know the register value is modified in the asembly block. Note: The `inline(never)` attribute in the aarch64 implementation is removed as well as that was mentioned as being added in response to the x64 issue as opposed to a specific aarch64 issue. Signed-off-by: Michael Kubacki --- src/aarch64/reg.rs | 3 --- src/x64.rs | 7 +++---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/aarch64/reg.rs b/src/aarch64/reg.rs index 6042f42..e131cf0 100644 --- a/src/aarch64/reg.rs +++ b/src/aarch64/reg.rs @@ -360,13 +360,10 @@ pub(crate) fn is_this_page_table_active(page_table_base: PhysicalAddress) -> boo /// 1. Ensure that the compiler does not optimize out the zeroing /// 2. Ensure that the zeroing is done as quickly as possible as without this, the zero takes a long time on /// non-optimized builds -/// 3. This function must not be inlined to ensure that the register reads and writes don't affect the caller's -/// registers. /// /// # Safety /// This function is unsafe because it operates on raw pointers. It requires the caller to ensure the VA passed in /// is mapped. -#[inline(never)] pub(crate) unsafe fn zero_page(page: u64) { // If the MMU is diabled, invalidate the cache so that any stale data does // not get later evicted to memory. diff --git a/src/x64.rs b/src/x64.rs index 67c126b..8b25636 100644 --- a/src/x64.rs +++ b/src/x64.rs @@ -89,20 +89,19 @@ impl PageTableHal for PageTableArchX64 { type PTE = X64PageTableEntry; const DEFAULT_ATTRIBUTES: MemoryAttributes = MemoryAttributes::empty(); - // This function must not be inlined to ensure that the register reads and writes don't affect the - // caller's registers. It has been viewed that this function is inlined several layers up the stack and has - // corrupted the rdi register, causing a crash. - #[inline(never)] unsafe fn zero_page(base: VirtualAddress) { let _page: u64 = base.into(); #[cfg(all(not(test), target_arch = "x86_64"))] unsafe { asm!( + "mov r8, rdi", // r8 will hold the original address of the page "cld", // Clear the direction flag so that we increment rdi with each store "rep stosq", // Repeat the store of qword in rax to [rdi] rcx times + "mov rdi, r8", // Restore the original address of the page in("rcx") 0x200, // we write 512 qwords (4096 bytes) in("rdi") _page, // start at the page in("rax") 0, // store 0 + out("r8") _, // r8 is used to hold the original address of the page options(nostack) ); }