Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cts_runner/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ unittests:*

webgpu:api,operation,adapter,requestAdapter:*
webgpu:api,operation,buffers,createBindGroup:buffer_binding_resource:*
webgpu:api,operation,buffers,map:mapAsync,read,*
webgpu:api,operation,buffers,map:mapAsync,write,*
webgpu:api,operation,command_buffer,basic:*
webgpu:api,operation,command_buffer,copyBufferToBuffer:*
fails-if(vulkan) webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:format="depth16unorm"
Expand Down
43 changes: 43 additions & 0 deletions tests/tests/wgpu-gpu/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub fn all_tests(vec: &mut Vec<GpuTestInitializer>) {
vec.extend([
EMPTY_BUFFER,
MAP_OFFSET,
MAP_WITHOUT_SUBMIT,
MINIMUM_BUFFER_BINDING_SIZE_LAYOUT,
MINIMUM_BUFFER_BINDING_SIZE_DISPATCH,
CLEAR_OFFSET_OUTSIDE_RESOURCE_BOUNDS,
Expand Down Expand Up @@ -184,6 +185,48 @@ static MAP_OFFSET: GpuTestConfiguration = GpuTestConfiguration::new()
}
});

/// Mapping a buffer should see data previously written to the buffer, even if there was no
/// intervening submit.
///
/// Regression test for [#5173](https://github.com/gfx-rs/wgpu/issues/5173).
#[gpu_test]
static MAP_WITHOUT_SUBMIT: GpuTestConfiguration =
GpuTestConfiguration::new().run_async(|ctx| async move {
let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: 12,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: true,
});

{
let data = (0..12).map(|i| (i % 255) as u8).collect::<Vec<u8>>();
let mut mapped = buffer.slice(0..12).get_mapped_range_mut();
assert!(mapped.len() == 12);
mapped.copy_from_slice(&data);
}

buffer.unmap();

buffer
.slice(0..12)
.map_async(wgpu::MapMode::Read, Result::unwrap);

ctx.async_poll(wgpu::PollType::wait_indefinitely())
.await
.unwrap();

{
let mapped = buffer.slice(0..12).get_mapped_range();
assert!(mapped.len() == 12);
for (i, elt) in mapped.iter().enumerate() {
assert_eq!(*elt, (i % 255) as u8);
}
}

buffer.unmap();
});

/// The WebGPU algorithm [validating shader binding][vsb] requires
/// implementations to check that buffer bindings are large enough to
/// hold the WGSL `storage` or `uniform` variables they're bound to.
Expand Down
9 changes: 7 additions & 2 deletions wgpu-core/src/device/life.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,15 @@ impl LifetimeTracker {
}

/// Start tracking resources associated with a new queue submission.
pub fn track_submission(&mut self, index: SubmissionIndex, encoders: Vec<EncoderInFlight>) {
pub fn track_submission(
&mut self,
index: SubmissionIndex,
encoders: Vec<EncoderInFlight>,
map_buffer: Option<Arc<Buffer>>,
) {
self.active.push(ActiveSubmission {
index,
mapped: Vec::new(),
mapped: map_buffer.into_iter().collect(),
compact_read_back: Vec::new(),
encoders,
work_done_closures: SmallVec::new(),
Expand Down
75 changes: 65 additions & 10 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,13 @@ pub(crate) struct EncoderInFlight {
///
/// Instead, `Device::pending_writes` owns one of these values, which
/// has its own hal command encoder and resource lists. The commands
/// accumulated here are automatically submitted to the queue the next
/// time the user submits a wgpu command buffer, ahead of the user's
/// commands.
/// accumulated here are automatically submitted to the queue at the
/// sooner of:
///
/// 1. The user's next wgpu command buffer submission. (Pending writes
/// are inserted ahead of the user's commands.)
/// 2. The next `mapAsync` request for a buffer that has pending
/// writes.
///
/// Important:
/// When locking pending_writes be sure that tracker is not locked
Expand Down Expand Up @@ -487,8 +491,6 @@ pub enum QueueSubmitError {
Queue(#[from] DeviceError),
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error(transparent)]
Unmap(#[from] BufferAccessError),
#[error("{0} is still mapped")]
BufferStillMapped(ResourceErrorIdent),
#[error(transparent)]
Expand All @@ -503,7 +505,6 @@ impl WebGpuError for QueueSubmitError {
fn webgpu_error_type(&self) -> ErrorType {
match self {
Self::Queue(e) => e.webgpu_error_type(),
Self::Unmap(e) => e.webgpu_error_type(),
Self::CommandEncoder(e) => e.webgpu_error_type(),
Self::ValidateAsActionsError(e) => e.webgpu_error_type(),
Self::InvalidResource(e) => e.webgpu_error_type(),
Expand Down Expand Up @@ -1159,6 +1160,40 @@ impl Queue {
Ok(())
}

/// Flush `self.pending_writes` (by calling `Queue::submit`) if it contains writes for `buffer`.
pub fn flush_writes_for_buffer(
&self,
buffer: &Arc<Buffer>,
snatch_guard: SnatchGuard,
) -> Result<Option<SubmissionIndex>, BufferAccessError> {
use QueueSubmitError as E;
{
let pending_writes = self.pending_writes.lock();
if !pending_writes.contains_buffer(buffer) {
return Ok(None);
}
}

let res = self.submit_inner(&[], Some(buffer.clone()), snatch_guard);
match res {
Ok(submission_index) => Ok(Some(submission_index)),
Err((_, E::Queue(e))) => Err(e.into()),
Err((_, E::DestroyedResource(e))) => Err(e.into()),
Err((_, E::InvalidResource(e))) => Err(e.into()),
Err((
_,
e @ E::BufferStillMapped(_)
| e @ E::CommandEncoder(_)
| e @ E::ValidateAsActionsError(_),
)) => {
// These errors are not expected. Encode them as a string, because
// some of them are not serializable, while `BufferAccessError` is.
log::debug!("Unexpected error flushing writes for buffer: {e}");
Err(BufferAccessError::QueueSubmit(e.to_string()))
Comment on lines +1189 to +1192
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should either panic here or add a new submit function to make it impossible for these variants to be returned.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactoring the submit function to be used by a new submit function that doesn't return these variants seems a lot more tricky.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's possible to take out this block and call it from map_async.

let mut pending_writes = self.pending_writes.lock();
{
used_surface_textures.set_size(self.device.tracker_indices.textures.size());
for texture in pending_writes.dst_textures.values() {
match texture.try_inner(&snatch_guard) {
Ok(TextureInner::Native { .. }) => {}
Ok(TextureInner::Surface { .. }) => {
// Compare the Arcs by pointer as Textures don't implement Eq
submit_surface_textures_owned
.insert(Arc::as_ptr(texture), texture.clone());
unsafe {
used_surface_textures
.merge_single(texture, None, wgt::TextureUses::PRESENT)
.unwrap()
};
}
// The texture must not have been destroyed when its usage here was
// encoded. If it was destroyed after that, then it was transferred
// to `pending_writes.temp_resources` at the time of destruction, so
// we are still okay to use it.
Err(DestroyedResourceError(_)) => {}
}
}
if !used_surface_textures.is_empty() {
let mut trackers = self.device.trackers.lock();
let texture_barriers = trackers
.textures
.set_from_usage_scope_and_drain_transitions(
&used_surface_textures,
&snatch_guard,
)
.collect::<Vec<_>>();
unsafe {
pending_writes
.command_encoder
.transition_textures(&texture_barriers);
};
}
}
match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) {
Ok(Some(pending_execution)) => {
active_executions.insert(0, pending_execution);
}
Ok(None) => {}
Err(e) => break 'error Err(e.into()),
}
let hal_command_buffers = active_executions
.iter()
.flat_map(|e| e.inner.list.iter().map(|b| b.as_ref()))
.collect::<Vec<_>>();
{
let mut submit_surface_textures =
SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(
submit_surface_textures_owned.len(),
);
for texture in submit_surface_textures_owned.values() {
let raw = match texture.inner.get(&snatch_guard) {
Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
_ => unreachable!(),
};
submit_surface_textures.push(raw);
}
if let Err(e) = unsafe {
self.raw().submit(
&hal_command_buffers,
&submit_surface_textures,
(fence.as_mut(), submit_index),
)
}
.map_err(|e| self.device.handle_hal_error(e))
{
break 'error Err(e.into());
}
drop(command_index_guard);
// Advance the successful submission index.
self.device
.last_successful_submission_index
.fetch_max(submit_index, Ordering::SeqCst);
}
profiling::scope!("cleanup");
// this will register the new submission to the life time tracker
self.lock_life()
.track_submission(submit_index, active_executions);
drop(pending_writes);

Below it we call device.maintain to cleanup resources of previous submissions but I don't think we should do that in map_async.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A heads-up: @atlv24 has pulled this out in #9361.

}
}
}

#[cfg(feature = "trace")]
fn trace_submission(
&self,
Expand Down Expand Up @@ -1186,18 +1221,36 @@ impl Queue {
}
}

/// Submit command buffers to the queue.
pub fn submit(
&self,
command_buffers: &[Arc<CommandBuffer>],
) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
profiling::scope!("Queue::submit");
api_log!("Queue::submit");

let snatch_guard = self.device.snatchable_lock.read();
self.submit_inner(command_buffers, None, snatch_guard)
}

/// Perform a queue submission
///
/// This is used internally for `Queue::submit` and for `Buffer::map_async` when it
/// needs to flush pending writes.
///
/// In the latter case, `command_buffers` is `&[]` (that is not a requirement, but
/// the error handling path for `map_async` doesn't expect command buffer errors),
/// and `map_buffer` is the buffer being mapped, which will be scheduled for
/// mapping after the submission completes.
fn submit_inner(
&self,
command_buffers: &[Arc<CommandBuffer>],
map_buffer: Option<Arc<Buffer>>,
snatch_guard: SnatchGuard,
) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
let submit_index;

let res = 'error: {
let snatch_guard = self.device.snatchable_lock.read();

// Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
let mut fence = self.device.fence.write();

Expand Down Expand Up @@ -1464,13 +1517,15 @@ impl Queue {

profiling::scope!("cleanup");

// this will register the new submission to the life time tracker
// This will register the new submission to the life time tracker,
// including scheduling mapping of `map_buffer`, if applicable.
self.lock_life()
.track_submission(submit_index, active_executions);
.track_submission(submit_index, active_executions, map_buffer);
drop(pending_writes);

// This will schedule destruction of all resources that are no longer needed
// by the user but used in the command stream, among other things.
// `device.maintain` consumes and will release the snatch guard.
let fence_guard = RwLockWriteGuard::downgrade(fence);
let (closures, result) =
self.device
Expand Down
78 changes: 52 additions & 26 deletions wgpu-core/src/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ pub enum BufferAccessError {
Failed,
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error("An error occurred while flushing pending writes to the buffer: {0}")]
QueueSubmit(String),
#[error("Buffer is already mapped")]
AlreadyMapped,
#[error("Buffer map is pending")]
Expand Down Expand Up @@ -329,6 +331,8 @@ impl WebGpuError for BufferAccessError {
Self::InvalidResource(e) => e.webgpu_error_type(),
Self::DestroyedResource(e) => e.webgpu_error_type(),

Self::QueueSubmit(_) => ErrorType::Internal,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spec allows the internal variant to only be returned by pipeline creation.


Self::Failed
| Self::AlreadyMapped
| Self::MapAlreadyPending
Expand Down Expand Up @@ -654,50 +658,72 @@ impl Buffer {
return Err((op, e.into()));
}

{
// `submit_index` will be set to:
// - `Some(index)`, if there is a submission the mapping operation must wait for.
// - `Some(0)`, if we have a queue and there is no submission to wait for.
// - `None`, if we don't have a queue.
let submit_index = {
let snatch_guard = device.snatchable_lock.read();
if let Err(e) = self.check_destroyed(&snatch_guard) {
return Err((op, e.into()));
}
}

{
let map_state = &mut *self.map_state.lock();
*map_state = match *map_state {
BufferMapState::Init { .. } | BufferMapState::Active { .. } => {
return Err((op, BufferAccessError::AlreadyMapped));
}
BufferMapState::Waiting(_) => {
return Err((op, BufferAccessError::MapAlreadyPending));
}
BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping {
range: offset..end_offset,
op,
_parent_buffer: self.clone(),
}),
};
}
// Review note: the code previously dropped the snatch lock here. I don't see how
// that was correct, if we drop it then the buffer could be destroyed.
Comment on lines +671 to +672
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it was ok because we were not using the raw hal buffer for any operation but we were chaning state on the core buffer so, maybe we could have gotten into an unexpected situation.


{
let map_state = &mut *self.map_state.lock();
*map_state = match *map_state {
BufferMapState::Init { .. } | BufferMapState::Active { .. } => {
return Err((op, BufferAccessError::AlreadyMapped));
}
BufferMapState::Waiting(_) => {
return Err((op, BufferAccessError::MapAlreadyPending));
}
BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping {
range: offset..end_offset,
op,
_parent_buffer: self.clone(),
}),
};
}

if let Some(queue) = device.get_queue().as_ref() {
Some(match queue.flush_writes_for_buffer(self, snatch_guard) {
Err(err) => {
let state = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle);
let BufferMapState::Waiting(BufferPendingMapping { op, .. }) = state else {
unreachable!();
};
return Err((op, err));
}
Ok(Some(submit_index)) => submit_index,
Ok(None) => queue.lock_life().map(self).unwrap_or(0),
})
} else {
None
}
};

// TODO: we are ignoring the transition here, I think we need to add a barrier
// at the end of the submission
// TODO(https://github.com/gfx-rs/wgpu/issues/9306): we are ignoring the transition
// here, I think we need to add a barrier at the end of the submission
device
.trackers
.lock()
.buffers
.set_single(self, internal_use);

let submit_index = if let Some(queue) = device.get_queue() {
queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary
if let Some(index) = submit_index {
Ok(index)
} else {
// We don't have a queue, so go ahead and map the buffer.
// We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`.
let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap();
if let Some(callback) = operation.callback.take() {
callback(status);
}
0
};

Ok(submit_index)
Ok(0)
}
}

pub fn get_mapped_range(
Expand Down