libmir · Sukuna0007Abhi · Mar 31, 2026
diff --git a/source/dcompute/driver/backend.d b/source/dcompute/driver/backend.d
@@ -4,4 +4,5 @@ enum Backend
 {
     OpenCL120,
     CUDA650,
+    Metal300,
 }
diff --git a/source/dcompute/driver/metal/buffer.d b/source/dcompute/driver/metal/buffer.d
@@ -0,0 +1,62 @@
+/**
+ * Metal GPU buffer abstraction.
+ *
+ * Wraps MTLBuffer to provide GPU memory allocation with shared or
+ * device-only storage modes. Mirrors dcompute.driver.cuda.buffer.
+ */
+module dcompute.driver.metal.buffer;
+
+version (OSX):
+
+/// Metal resource storage modes.
+/// These map directly to MTLResourceOptions / MTLStorageMode.
+enum StorageMode
+{
+    /// CPU and GPU can both read/write. No explicit sync needed.
+    /// Best for small buffers or buffers that change every frame.
+    shared_,
+
+    /// GPU-only memory. Faster GPU access but requires explicit
+    /// copies to/from CPU-accessible staging buffers.
+    private_,
+
+    /// CPU-writable, GPU-readable. The driver manages coherency.
+    /// Only available on macOS (not iOS).
+    managed,
+}
+
+/// A typed GPU buffer backed by MTLBuffer.
+struct MetalBuffer(T)
+{
+    // Opaque handle to MTLBuffer
+    private void* _handle;
+    private size_t _count;
+
+    /// Number of elements in this buffer
+    @property size_t length() const { return _count; }
+
+    /// Size in bytes
+    @property size_t sizeBytes() const { return _count * T.sizeof; }
+
+    /// Get a CPU-side slice of the buffer contents.
+    /// Only valid for shared or managed storage mode.
+    @property T[] contents()
+    {
+        // TODO: cast MTLBuffer.contents to T* and slice
+        return null;
+    }
+
+    /// Copy data from a host array into this buffer.
+    void upload(const T[] data)
+    {
+        assert(data.length <= _count, "Source data exceeds buffer size");
+        // TODO: memcpy into MTLBuffer.contents
+    }
+
+    /// Copy data from this buffer into a host array.
+    void download(T[] dest)
+    {
+        assert(dest.length <= _count, "Destination array too small");
+        // TODO: memcpy from MTLBuffer.contents
+    }
+}
diff --git a/source/dcompute/driver/metal/device.d b/source/dcompute/driver/metal/device.d
@@ -0,0 +1,57 @@
+/**
+ * Metal device abstraction.
+ *
+ * Wraps MTLDevice to provide GPU device discovery and info queries,
+ * matching the interface of dcompute.driver.cuda.device.
+ */
+module dcompute.driver.metal.device;
+
+version (OSX):
+
+/// Represents a Metal-capable GPU device.
+/// On Apple Silicon, there is typically one device (the integrated GPU).
+/// On Intel Macs with discrete GPUs, there may be multiple.
+struct MetalDevice
+{
+    // Opaque handle to the underlying MTLDevice
+    // Will be populated via extern(Objective-C) bindings from metal-d
+    private void* _handle;
+
+    /// Returns the device name (e.g. "Apple M1", "AMD Radeon Pro 5500M")
+    @property string name()
+    {
+        // TODO: call [_handle name] via extern(Objective-C)
+        return "Metal Device (stub)";
+    }
+
+    /// Returns true if this device supports the Metal GPU family needed
+    /// for compute shaders
+    @property bool supportsCompute()
+    {
+        // All Metal devices support compute shaders
+        return true;
+    }
+
+    /// Maximum threads per threadgroup (typically 1024 on Apple Silicon)
+    @property size_t maxThreadsPerThreadgroup()
+    {
+        // TODO: query MTLDevice.maxThreadsPerThreadgroup
+        return 1024;
+    }
+
+    /// Maximum buffer length in bytes
+    @property size_t maxBufferLength()
+    {
+        // TODO: query MTLDevice.maxBufferLength
+        return 256 * 1024 * 1024; // 256 MB default
+    }
+}
+
+/// Get the system default Metal device.
+/// Returns null-state device if no Metal GPU is available.
+MetalDevice systemDefaultDevice()
+{
+    // TODO: wrap MTLCreateSystemDefaultDevice()
+    MetalDevice dev;
+    return dev;
+}
diff --git a/source/dcompute/driver/metal/package.d b/source/dcompute/driver/metal/package.d
@@ -0,0 +1,17 @@
+/**
+ * Metal compute driver for DCompute.
+ *
+ * Provides access to Apple Metal GPU compute on macOS via
+ * LDC's extern(Objective-C) support and the Inochi2D/metal-d bindings.
+ *
+ * This mirrors the structure of dcompute.driver.cuda and dcompute.driver.ocl.
+ * Only available on macOS (version(OSX)).
+ */
+module dcompute.driver.metal;
+
+version (OSX):
+
+public import dcompute.driver.metal.queue;
+public import dcompute.driver.metal.buffer;
+public import dcompute.driver.metal.program;
+public import dcompute.driver.metal.device;
diff --git a/source/dcompute/driver/metal/program.d b/source/dcompute/driver/metal/program.d
@@ -0,0 +1,62 @@
+/**
+ * Metal program/library abstraction.
+ *
+ * Wraps MTLLibrary to load precompiled .metallib files
+ * (produced by ldc2 -mdcompute-targets=metal-300 + metallib tool).
+ * Mirrors dcompute.driver.cuda.program.
+ */
+module dcompute.driver.metal.program;
+
+version (OSX):
+
+/// A compiled Metal library loaded from a .metallib file.
+/// Contains one or more compute kernel functions.
+struct MetalProgram
+{
+    // Opaque handle to MTLLibrary
+    private void* _library;
+
+    /// Whether this program has been loaded successfully
+    @property bool isLoaded() const
+    {
+        return _library !is null;
+    }
+
+    /// Get a kernel function by name from this library.
+    /// Returns an opaque handle to MTLFunction.
+    void* getFunction(string name)
+    {
+        // TODO: _library.newFunctionWithName(name.toNSString)
+        return null;
+    }
+
+    /// List all function names in this library.
+    /// Useful for debugging which kernels were compiled.
+    string[] functionNames()
+    {
+        // TODO: _library.functionNames
+        return [];
+    }
+}
+
+/// Load a compiled .metallib file from disk.
+///
+/// The .metallib file is produced by the pipeline:
+///   ldc2 -mdcompute-targets=metal-300 kernel.d  →  kernel.air
+///   metallib kernel.air -o kernel.metallib
+///
+/// Params:
+///   path = path to the .metallib file
+/// Returns:
+///   a MetalProgram ready for kernel dispatch
+MetalProgram loadLibrary(string path)
+{
+    // TODO: Implementation outline:
+    // 1. auto device = MTLCreateSystemDefaultDevice();
+    // 2. auto url = NSURL.fileURLWithPath(path.toNSString);
+    // 3. NSError* err;
+    // 4. auto lib = device.newLibraryWithURL(url, &err);
+    // 5. enforce(lib !is null, err.localizedDescription);
+    MetalProgram prog;
+    return prog;
+}
diff --git a/source/dcompute/driver/metal/queue.d b/source/dcompute/driver/metal/queue.d
@@ -0,0 +1,75 @@
+/**
+ * Metal command queue and kernel dispatch.
+ *
+ * Wraps MTLCommandQueue / MTLCommandBuffer / MTLComputeCommandEncoder
+ * to provide kernel dispatch, matching dcompute.driver.cuda.queue.
+ */
+module dcompute.driver.metal.queue;
+
+version (OSX):
+
+import dcompute.driver.metal.buffer;
+import dcompute.driver.metal.program;
+
+/// Grid dimensions for kernel dispatch (threadgroups × threads-per-threadgroup).
+/// Equivalent to CUDA's grid/block dimensions.
+struct MTLSize
+{
+    size_t width  = 1;
+    size_t height = 1;
+    size_t depth  = 1;
+}
+
+/// A Metal command queue for submitting compute work to the GPU.
+struct MetalQueue
+{
+    // Opaque handles
+    private void* _device;       // MTLDevice*
+    private void* _commandQueue; // MTLCommandQueue*
+
+    /// Dispatch a compute kernel.
+    ///
+    /// Params:
+    ///   program  = compiled Metal program containing the kernel
+    ///   kernel   = name of the kernel function
+    ///   grid     = number of threadgroups
+    ///   tgSize   = threads per threadgroup
+    void dispatch(ref MetalProgram program, string kernel,
+                  MTLSize grid, MTLSize tgSize)
+    {
+        // TODO: Implementation outline:
+        // 1. auto fn = program.getFunction(kernel);
+        // 2. auto pso = _device.newComputePipelineStateWithFunction(fn);
+        // 3. auto cmdBuf = _commandQueue.commandBuffer();
+        // 4. auto encoder = cmdBuf.computeCommandEncoder();
+        // 5. encoder.setComputePipelineState(pso);
+        // 6. encoder.dispatchThreadgroups(grid, tgSize);
+        // 7. encoder.endEncoding();
+        // 8. cmdBuf.commit();
+    }
+
+    /// Dispatch and wait for completion (synchronous).
+    void dispatchSync(ref MetalProgram program, string kernel,
+                      MTLSize grid, MTLSize tgSize)
+    {
+        dispatch(program, kernel, grid, tgSize);
+        // TODO: cmdBuf.waitUntilCompleted();
+    }
+
+    /// Allocate a shared-memory buffer accessible by both CPU and GPU.
+    MetalBuffer!T allocate(T)(size_t count)
+    {
+        // TODO: _device.newBufferWithLength(count * T.sizeof, shared)
+        MetalBuffer!T buf;
+        buf._count = count;
+        return buf;
+    }
+}
+
+/// Create a command queue on the default Metal device.
+MetalQueue createQueue()
+{
+    // TODO: MTLCreateSystemDefaultDevice() -> newCommandQueue()
+    MetalQueue q;
+    return q;
+}