gfx-rs · 39ali · Apr 5, 2026 · andyleiserson · Apr 8, 2026 · 39ali
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
@@ -1,3 +1,4 @@
+use objc2::rc::autoreleasepool;
 use objc2::runtime::{AnyObject, ProtocolObject, Sel};
 use objc2::{available, sel};
 use objc2_foundation::{NSOperatingSystemVersion, NSProcessInfo};
@@ -9,6 +10,7 @@ use objc2_metal::{
 use wgt::{AstcBlock, AstcChannel};
 
 use alloc::{string::ToString as _, sync::Arc, vec::Vec};
+use core::mem::ManuallyDrop;
 use core::sync::atomic;
 
 use crate::metal::QueueShared;
@@ -79,52 +81,54 @@ impl crate::Adapter for super::Adapter {
         limits: &wgt::Limits,
         _memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
-        let queue = self
-            .shared
-            .device
-            .newCommandQueueWithMaxCommandBufferCount(MAX_COMMAND_BUFFERS)
-            .unwrap();
-
-        // Acquiring the meaning of timestamp ticks is hard with Metal!
-        // The only thing there is a method correlating cpu & gpu timestamps (`device.sample_timestamps`).
-        // Users are supposed to call this method twice and calculate the difference,
-        // see "Converting GPU Timestamps into CPU Time":
-        // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time
-        // Not only does this mean we get an approximate value, this is as also *very slow*!
-        // Chromium opted to solve this using a linear regression that they stop at some point
-        // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46
-        // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values.
-        //
-        // We should do as Chromium does for the general case, but this requires quite some state tracking
-        // and doesn't even provide perfectly accurate values, especially at the start of the application when
-        // we didn't have the chance to sample a lot of values just yet.
-        //
-        // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps
-        // conversions on different devices, after all Metal isn't supported on that many ;)
-        // Based on:
-        // * https://github.com/gfx-rs/wgpu/pull/2528
-        // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326
-        let timestamp_period = if self.shared.device.name().to_string().starts_with("Intel") {
-            83.333
-        } else {
-            // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs.
-            1.0
-        };
-
-        Ok(crate::OpenDevice {
-            device: super::Device {
-                shared: Arc::clone(&self.shared),
-                features,
-                counters: Default::default(),
-                limits: limits.clone(),
-            },
-            queue: super::Queue {
-                shared: Arc::new(QueueShared {
-                    raw: queue,
-                    command_buffer_created_not_submitted: atomic::AtomicUsize::new(0),
-                }),
-                timestamp_period,
-            },
+        autoreleasepool(|_| {
+            let queue = self
+                .shared
+                .device
+                .newCommandQueueWithMaxCommandBufferCount(MAX_COMMAND_BUFFERS)
+                .unwrap();
+
+            // Acquiring the meaning of timestamp ticks is hard with Metal!
+            // The only thing there is a method correlating cpu & gpu timestamps (`device.sample_timestamps`).
+            // Users are supposed to call this method twice and calculate the difference,
+            // see "Converting GPU Timestamps into CPU Time":
+            // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time
+            // Not only does this mean we get an approximate value, this is as also *very slow*!
+            // Chromium opted to solve this using a linear regression that they stop at some point
+            // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46
+            // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values.
+            //
+            // We should do as Chromium does for the general case, but this requires quite some state tracking
+            // and doesn't even provide perfectly accurate values, especially at the start of the application when
+            // we didn't have the chance to sample a lot of values just yet.
+            //
+            // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps
+            // conversions on different devices, after all Metal isn't supported on that many ;)
+            // Based on:
+            // * https://github.com/gfx-rs/wgpu/pull/2528
+            // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326
+            let timestamp_period = if self.shared.device.name().to_string().starts_with("Intel") {
+                83.333
+            } else {
+                // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs.
+                1.0
+            };
+
+            Ok(crate::OpenDevice {
+                device: super::Device {
+                    shared: Arc::clone(&self.shared),
+                    features,
+                    counters: Default::default(),
+                    limits: limits.clone(),
+                },
+                queue: super::Queue {
+                    shared: Arc::new(QueueShared {
+                        raw: ManuallyDrop::new(queue),
+                        command_buffer_created_not_submitted: atomic::AtomicUsize::new(0),
+                    }),
+                    timestamp_period,
+                },
+            })
         })
     }
 

diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
@@ -449,8 +449,11 @@ impl crate::Device for super::Device {
             })
         })
     }
-    unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {
+    unsafe fn destroy_buffer(&self, buffer: super::Buffer) {
         self.counters.buffers.sub(1);
+        autoreleasepool(|_| {
+            drop(buffer);
+        });
     }
 
     unsafe fn add_raw_buffer(&self, _buffer: &super::Buffer) {

diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
@@ -29,7 +29,7 @@ mod surface;
 mod time;
 
 use alloc::{string::ToString as _, sync::Arc, vec::Vec};
-use core::{fmt, iter, ops, ptr::NonNull, sync::atomic};
+use core::{fmt, iter, mem::ManuallyDrop, ops, ptr::NonNull, sync::atomic};
 
 use bitflags::bitflags;
 use hashbrown::HashMap;
@@ -411,27 +411,29 @@ impl AdapterShared {
     }
 
     fn expose(device: Retained<ProtocolObject<dyn MTLDevice>>) -> crate::ExposedAdapter<Api> {
-        let name = device.name().to_string();
-        let capabilities_query = CapabilitiesQuery::new(&device);
-        let shared = AdapterShared::new(device, &capabilities_query);
-        let features = capabilities_query.features();
-        let capabilities = capabilities_query.capabilities();
-        crate::ExposedAdapter {
-            info: wgt::AdapterInfo {
-                name,
-                // These are hardcoded based on typical values for Metal devices
-                //
-                // See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
-                // for more information.
-                subgroup_min_size: 4,
-                subgroup_max_size: 64,
-                transient_saves_memory: shared.private_caps.supports_memoryless_storage,
-                ..wgt::AdapterInfo::new(shared.private_caps.device_type(), wgt::Backend::Metal)
-            },
-            features,
-            capabilities,
-            adapter: Adapter::new(Arc::new(shared)),
-        }
+        autoreleasepool(|_| {
+            let name = device.name().to_string();
+            let capabilities_query = CapabilitiesQuery::new(&device);
+            let shared = AdapterShared::new(device, &capabilities_query);
+            let features = capabilities_query.features();
+            let capabilities = capabilities_query.capabilities();
+            crate::ExposedAdapter {
+                info: wgt::AdapterInfo {
+                    name,
+                    // These are hardcoded based on typical values for Metal devices
+                    //
+                    // See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
+                    // for more information.
+                    subgroup_min_size: 4,
+                    subgroup_max_size: 64,
+                    transient_saves_memory: shared.private_caps.supports_memoryless_storage,
+                    ..wgt::AdapterInfo::new(shared.private_caps.device_type(), wgt::Backend::Metal)
+                },
+                features,
+                capabilities,
+                adapter: Adapter::new(Arc::new(shared)),
+            }
+        })
     }
 }
 
@@ -454,7 +456,7 @@ impl Queue {
     ) -> Self {
         Self {
             shared: Arc::new(QueueShared {
-                raw,
+                raw: ManuallyDrop::new(raw),
                 command_buffer_created_not_submitted: atomic::AtomicUsize::new(0),
             }),
             timestamp_period,
@@ -464,7 +466,7 @@ impl Queue {
 
 #[derive(Debug)]
 pub struct QueueShared {
-    raw: Retained<ProtocolObject<dyn MTLCommandQueue>>,
+    raw: ManuallyDrop<Retained<ProtocolObject<dyn MTLCommandQueue>>>,
     // Tracks command buffers created via `CommandEncoder::begin_encoding` that
     // have not yet been submitted or discarded. Used to proactively fail
     // before hitting Metal's `maxCommandBufferCount`.
@@ -475,6 +477,14 @@ pub struct QueueShared {
     command_buffer_created_not_submitted: atomic::AtomicUsize,
 }
 
+impl Drop for QueueShared {
+    fn drop(&mut self) {
+        autoreleasepool(|_| unsafe {
+            ManuallyDrop::drop(&mut self.raw);
+        });
+    }
+}
+
 pub struct Device {
     shared: Arc<AdapterShared>,
     features: wgt::Features,