gfx-rs · andyleiserson · Mar 24, 2026 · teoxoy · Mar 26, 2026 · teoxoy
diff --git a/cts_runner/test.lst b/cts_runner/test.lst
@@ -12,6 +12,8 @@ unittests:*
 
 webgpu:api,operation,adapter,requestAdapter:*
 webgpu:api,operation,buffers,createBindGroup:buffer_binding_resource:*
+webgpu:api,operation,buffers,map:mapAsync,read,*
+webgpu:api,operation,buffers,map:mapAsync,write,*
 webgpu:api,operation,command_buffer,basic:*
 webgpu:api,operation,command_buffer,copyBufferToBuffer:*
 fails-if(vulkan) webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:format="depth16unorm"

diff --git a/tests/tests/wgpu-gpu/buffer.rs b/tests/tests/wgpu-gpu/buffer.rs
@@ -6,6 +6,7 @@ pub fn all_tests(vec: &mut Vec<GpuTestInitializer>) {
     vec.extend([
         EMPTY_BUFFER,
         MAP_OFFSET,
+        MAP_WITHOUT_SUBMIT,
         MINIMUM_BUFFER_BINDING_SIZE_LAYOUT,
         MINIMUM_BUFFER_BINDING_SIZE_DISPATCH,
         CLEAR_OFFSET_OUTSIDE_RESOURCE_BOUNDS,
@@ -184,6 +185,48 @@ static MAP_OFFSET: GpuTestConfiguration = GpuTestConfiguration::new()
         }
     });
 
+/// Mapping a buffer should see data previously written to the buffer, even if there was no
+/// intervening submit.
+///
+/// Regression test for [#5173](https://github.com/gfx-rs/wgpu/issues/5173).
+#[gpu_test]
+static MAP_WITHOUT_SUBMIT: GpuTestConfiguration =
+    GpuTestConfiguration::new().run_async(|ctx| async move {
+        let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: 12,
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: true,
+        });
+
+        {
+            let data = (0..12).map(|i| (i % 255) as u8).collect::<Vec<u8>>();
+            let mut mapped = buffer.slice(0..12).get_mapped_range_mut();
+            assert!(mapped.len() == 12);
+            mapped.copy_from_slice(&data);
+        }
+
+        buffer.unmap();
+
+        buffer
+            .slice(0..12)
+            .map_async(wgpu::MapMode::Read, Result::unwrap);
+
+        ctx.async_poll(wgpu::PollType::wait_indefinitely())
+            .await
+            .unwrap();
+
+        {
+            let mapped = buffer.slice(0..12).get_mapped_range();
+            assert!(mapped.len() == 12);
+            for (i, elt) in mapped.iter().enumerate() {
+                assert_eq!(*elt, (i % 255) as u8);
+            }
+        }
+
+        buffer.unmap();
+    });
+
 /// The WebGPU algorithm [validating shader binding][vsb] requires
 /// implementations to check that buffer bindings are large enough to
 /// hold the WGSL `storage` or `uniform` variables they're bound to.

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
@@ -209,10 +209,15 @@ impl LifetimeTracker {
     }
 
     /// Start tracking resources associated with a new queue submission.
-    pub fn track_submission(&mut self, index: SubmissionIndex, encoders: Vec<EncoderInFlight>) {
+    pub fn track_submission(
+        &mut self,
+        index: SubmissionIndex,
+        encoders: Vec<EncoderInFlight>,
+        map_buffer: Option<Arc<Buffer>>,
+    ) {
         self.active.push(ActiveSubmission {
             index,
-            mapped: Vec::new(),
+            mapped: map_buffer.into_iter().collect(),
             compact_read_back: Vec::new(),
             encoders,
             work_done_closures: SmallVec::new(),

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
@@ -305,9 +305,13 @@ pub(crate) struct EncoderInFlight {
 ///
 /// Instead, `Device::pending_writes` owns one of these values, which
 /// has its own hal command encoder and resource lists. The commands
-/// accumulated here are automatically submitted to the queue the next
-/// time the user submits a wgpu command buffer, ahead of the user's
-/// commands.
+/// accumulated here are automatically submitted to the queue at the
+/// sooner of:
+///
+/// 1. The user's next wgpu command buffer submission. (Pending writes
+///    are inserted ahead of the user's commands.)
+/// 2. The next `mapAsync` request for a buffer that has pending
+///    writes.
 ///
 /// Important:
 /// When locking pending_writes be sure that tracker is not locked
@@ -487,8 +491,6 @@ pub enum QueueSubmitError {
     Queue(#[from] DeviceError),
     #[error(transparent)]
     DestroyedResource(#[from] DestroyedResourceError),
-    #[error(transparent)]
-    Unmap(#[from] BufferAccessError),
     #[error("{0} is still mapped")]
     BufferStillMapped(ResourceErrorIdent),
     #[error(transparent)]
@@ -503,7 +505,6 @@ impl WebGpuError for QueueSubmitError {
     fn webgpu_error_type(&self) -> ErrorType {
         match self {
             Self::Queue(e) => e.webgpu_error_type(),
-            Self::Unmap(e) => e.webgpu_error_type(),
             Self::CommandEncoder(e) => e.webgpu_error_type(),
             Self::ValidateAsActionsError(e) => e.webgpu_error_type(),
             Self::InvalidResource(e) => e.webgpu_error_type(),
@@ -1159,6 +1160,40 @@ impl Queue {
         Ok(())
     }
 
+    /// Flush `self.pending_writes` (by calling `Queue::submit`) if it contains writes for `buffer`.
+    pub fn flush_writes_for_buffer(
+        &self,
+        buffer: &Arc<Buffer>,
+        snatch_guard: SnatchGuard,
+    ) -> Result<Option<SubmissionIndex>, BufferAccessError> {
+        use QueueSubmitError as E;
+        {
+            let pending_writes = self.pending_writes.lock();
+            if !pending_writes.contains_buffer(buffer) {
+                return Ok(None);
+            }
+        }
+
+        let res = self.submit_inner(&[], Some(buffer.clone()), snatch_guard);
+        match res {
+            Ok(submission_index) => Ok(Some(submission_index)),
+            Err((_, E::Queue(e))) => Err(e.into()),
+            Err((_, E::DestroyedResource(e))) => Err(e.into()),
+            Err((_, E::InvalidResource(e))) => Err(e.into()),
+            Err((
+                _,
+                e @ E::BufferStillMapped(_)
+                | e @ E::CommandEncoder(_)
+                | e @ E::ValidateAsActionsError(_),
+            )) => {
+                // These errors are not expected. Encode them as a string, because
+                // some of them are not serializable, while `BufferAccessError` is.
+                log::debug!("Unexpected error flushing writes for buffer: {e}");
+                Err(BufferAccessError::QueueSubmit(e.to_string()))
 let mut pending_writes = self.pending_writes.lock(); 
 { 
     used_surface_textures.set_size(self.device.tracker_indices.textures.size()); 
     for texture in pending_writes.dst_textures.values() { 
         match texture.try_inner(&snatch_guard) { 
             Ok(TextureInner::Native { .. }) => {} 
             Ok(TextureInner::Surface { .. }) => { 
                 // Compare the Arcs by pointer as Textures don't implement Eq 
                 submit_surface_textures_owned 
                     .insert(Arc::as_ptr(texture), texture.clone()); 
                 unsafe { 
                     used_surface_textures 
                         .merge_single(texture, None, wgt::TextureUses::PRESENT) 
                         .unwrap() 
                 }; 
             } 
             // The texture must not have been destroyed when its usage here was 
             // encoded. If it was destroyed after that, then it was transferred 
             // to `pending_writes.temp_resources` at the time of destruction, so 
             // we are still okay to use it. 
             Err(DestroyedResourceError(_)) => {} 
         } 
     } 
     if !used_surface_textures.is_empty() { 
         let mut trackers = self.device.trackers.lock(); 
         let texture_barriers = trackers 
             .textures 
             .set_from_usage_scope_and_drain_transitions( 
                 &used_surface_textures, 
                 &snatch_guard, 
             ) 
             .collect::<Vec<_>>(); 
         unsafe { 
             pending_writes 
                 .command_encoder 
                 .transition_textures(&texture_barriers); 
         }; 
     } 
 } 
 match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) { 
     Ok(Some(pending_execution)) => { 
         active_executions.insert(0, pending_execution); 
     } 
     Ok(None) => {} 
     Err(e) => break 'error Err(e.into()), 
 } 
 let hal_command_buffers = active_executions 
     .iter() 
     .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref())) 
     .collect::<Vec<_>>(); 
 { 
     let mut submit_surface_textures = 
         SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity( 
             submit_surface_textures_owned.len(), 
         ); 
     for texture in submit_surface_textures_owned.values() { 
         let raw = match texture.inner.get(&snatch_guard) { 
             Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), 
             _ => unreachable!(), 
         }; 
         submit_surface_textures.push(raw); 
     } 
     if let Err(e) = unsafe { 
         self.raw().submit( 
             &hal_command_buffers, 
             &submit_surface_textures, 
             (fence.as_mut(), submit_index), 
         ) 
     } 
     .map_err(|e| self.device.handle_hal_error(e)) 
     { 
         break 'error Err(e.into()); 
     } 
     drop(command_index_guard); 
     // Advance the successful submission index. 
     self.device 
         .last_successful_submission_index 
         .fetch_max(submit_index, Ordering::SeqCst); 
 } 
 profiling::scope!("cleanup"); 
 // this will register the new submission to the life time tracker 
 self.lock_life() 
     .track_submission(submit_index, active_executions); 
 drop(pending_writes); 
 let mut pending_writes = self.pending_writes.lock(); 
  
 { 
     used_surface_textures.set_size(self.device.tracker_indices.textures.size()); 
     for texture in pending_writes.dst_textures.values() { 
         match texture.try_inner(&snatch_guard) { 
             Ok(TextureInner::Native { .. }) => {} 
             Ok(TextureInner::Surface { .. }) => { 
                 // Compare the Arcs by pointer as Textures don't implement Eq 
                 submit_surface_textures_owned 
                     .insert(Arc::as_ptr(texture), texture.clone()); 
  
                 unsafe { 
                     used_surface_textures 
                         .merge_single(texture, None, wgt::TextureUses::PRESENT) 
                         .unwrap() 
                 }; 
             } 
             // The texture must not have been destroyed when its usage here was 
             // encoded. If it was destroyed after that, then it was transferred 
             // to `pending_writes.temp_resources` at the time of destruction, so 
             // we are still okay to use it. 
             Err(DestroyedResourceError(_)) => {} 
         } 
     } 
  
     if !used_surface_textures.is_empty() { 
         let mut trackers = self.device.trackers.lock(); 
  
         let texture_barriers = trackers 
             .textures 
             .set_from_usage_scope_and_drain_transitions( 
                 &used_surface_textures, 
                 &snatch_guard, 
             ) 
             .collect::<Vec<_>>(); 
         unsafe { 
             pending_writes 
                 .command_encoder 
                 .transition_textures(&texture_barriers); 
         }; 
     } 
 } 
  
 match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) { 
     Ok(Some(pending_execution)) => { 
         active_executions.insert(0, pending_execution); 
     } 
     Ok(None) => {} 
     Err(e) => break 'error Err(e.into()), 
 } 
 let hal_command_buffers = active_executions 
     .iter() 
     .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref())) 
     .collect::<Vec<_>>(); 
  
 { 
     let mut submit_surface_textures = 
         SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity( 
             submit_surface_textures_owned.len(), 
         ); 
  
     for texture in submit_surface_textures_owned.values() { 
         let raw = match texture.inner.get(&snatch_guard) { 
             Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), 
             _ => unreachable!(), 
         }; 
         submit_surface_textures.push(raw); 
     } 
  
     if let Err(e) = unsafe { 
         self.raw().submit( 
             &hal_command_buffers, 
             &submit_surface_textures, 
             (fence.as_mut(), submit_index), 
         ) 
     } 
     .map_err(|e| self.device.handle_hal_error(e)) 
     { 
         break 'error Err(e.into()); 
     } 
  
     drop(command_index_guard); 
  
     // Advance the successful submission index. 
     self.device 
         .last_successful_submission_index 
         .fetch_max(submit_index, Ordering::SeqCst); 
 } 
  
 profiling::scope!("cleanup"); 
  
 // this will register the new submission to the life time tracker 
 self.lock_life() 
     .track_submission(submit_index, active_executions); 
 drop(pending_writes); 
+            }
+        }
+    }
+
     #[cfg(feature = "trace")]
     fn trace_submission(
         &self,
@@ -1186,18 +1221,36 @@ impl Queue {
         }
     }
 
+    /// Submit command buffers to the queue.
     pub fn submit(
         &self,
         command_buffers: &[Arc<CommandBuffer>],
     ) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
         profiling::scope!("Queue::submit");
         api_log!("Queue::submit");
 
+        let snatch_guard = self.device.snatchable_lock.read();
+        self.submit_inner(command_buffers, None, snatch_guard)
+    }
+
+    /// Perform a queue submission
+    ///
+    /// This is used internally for `Queue::submit` and for `Buffer::map_async` when it
+    /// needs to flush pending writes.
+    ///
+    /// In the latter case, `command_buffers` is `&[]` (that is not a requirement, but
+    /// the error handling path for `map_async` doesn't expect command buffer errors),
+    /// and `map_buffer` is the buffer being mapped, which will be scheduled for
+    /// mapping after the submission completes.
+    fn submit_inner(
+        &self,
+        command_buffers: &[Arc<CommandBuffer>],
+        map_buffer: Option<Arc<Buffer>>,
+        snatch_guard: SnatchGuard,
+    ) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
         let submit_index;
 
         let res = 'error: {
-            let snatch_guard = self.device.snatchable_lock.read();
-
             // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
             let mut fence = self.device.fence.write();
 
@@ -1464,13 +1517,15 @@ impl Queue {
 
             profiling::scope!("cleanup");
 
-            // this will register the new submission to the life time tracker
+            // This will register the new submission to the life time tracker,
+            // including scheduling mapping of `map_buffer`, if applicable.
             self.lock_life()
-                .track_submission(submit_index, active_executions);
+                .track_submission(submit_index, active_executions, map_buffer);
             drop(pending_writes);
 
             // This will schedule destruction of all resources that are no longer needed
             // by the user but used in the command stream, among other things.
+            // `device.maintain` consumes and will release the snatch guard.
             let fence_guard = RwLockWriteGuard::downgrade(fence);
             let (closures, result) =
                 self.device

diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
@@ -264,6 +264,8 @@ pub enum BufferAccessError {
     Failed,
     #[error(transparent)]
     DestroyedResource(#[from] DestroyedResourceError),
+    #[error("An error occurred while flushing pending writes to the buffer: {0}")]
+    QueueSubmit(String),
     #[error("Buffer is already mapped")]
     AlreadyMapped,
     #[error("Buffer map is pending")]
@@ -329,6 +331,8 @@ impl WebGpuError for BufferAccessError {
             Self::InvalidResource(e) => e.webgpu_error_type(),
             Self::DestroyedResource(e) => e.webgpu_error_type(),
 
+            Self::QueueSubmit(_) => ErrorType::Internal,
+
             Self::Failed
             | Self::AlreadyMapped
             | Self::MapAlreadyPending
@@ -654,50 +658,72 @@ impl Buffer {
             return Err((op, e.into()));
         }
 
-        {
+        // `submit_index` will be set to:
+        // - `Some(index)`, if there is a submission the mapping operation must wait for.
+        // - `Some(0)`, if we have a queue and there is no submission to wait for.
+        // - `None`, if we don't have a queue.
+        let submit_index = {
             let snatch_guard = device.snatchable_lock.read();
             if let Err(e) = self.check_destroyed(&snatch_guard) {
                 return Err((op, e.into()));
             }
-        }
 
-        {
-            let map_state = &mut *self.map_state.lock();
-            *map_state = match *map_state {
-                BufferMapState::Init { .. } | BufferMapState::Active { .. } => {
-                    return Err((op, BufferAccessError::AlreadyMapped));
-                }
-                BufferMapState::Waiting(_) => {
-                    return Err((op, BufferAccessError::MapAlreadyPending));
-                }
-                BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping {
-                    range: offset..end_offset,
-                    op,
-                    _parent_buffer: self.clone(),
-                }),
-            };
-        }
+            // Review note: the code previously dropped the snatch lock here. I don't see how
+            // that was correct, if we drop it then the buffer could be destroyed.
+
+            {
+                let map_state = &mut *self.map_state.lock();
+                *map_state = match *map_state {
+                    BufferMapState::Init { .. } | BufferMapState::Active { .. } => {
+                        return Err((op, BufferAccessError::AlreadyMapped));
+                    }
+                    BufferMapState::Waiting(_) => {
+                        return Err((op, BufferAccessError::MapAlreadyPending));
+                    }
+                    BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping {
+                        range: offset..end_offset,
+                        op,
+                        _parent_buffer: self.clone(),
+                    }),
+                };
+            }
+
+            if let Some(queue) = device.get_queue().as_ref() {
+                Some(match queue.flush_writes_for_buffer(self, snatch_guard) {
+                    Err(err) => {
+                        let state = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle);
+                        let BufferMapState::Waiting(BufferPendingMapping { op, .. }) = state else {
+                            unreachable!();
+                        };
+                        return Err((op, err));
+                    }
+                    Ok(Some(submit_index)) => submit_index,
+                    Ok(None) => queue.lock_life().map(self).unwrap_or(0),
+                })
+            } else {
+                None
+            }
+        };
 
-        // TODO: we are ignoring the transition here, I think we need to add a barrier
-        // at the end of the submission
+        // TODO(https://github.com/gfx-rs/wgpu/issues/9306): we are ignoring the transition
+        // here, I think we need to add a barrier at the end of the submission
         device
             .trackers
             .lock()
             .buffers
             .set_single(self, internal_use);
 
-        let submit_index = if let Some(queue) = device.get_queue() {
-            queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary
+        if let Some(index) = submit_index {
+            Ok(index)
         } else {
+            // We don't have a queue, so go ahead and map the buffer.
             // We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`.
             let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap();
             if let Some(callback) = operation.callback.take() {
                 callback(status);
             }
-            0
-        };
-
-        Ok(submit_index)
+            Ok(0)
+        }
     }
 
     pub fn get_mapped_range(