Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions source/dcompute/driver/backend.d
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ enum Backend
{
OpenCL120,
CUDA650,
Metal300,
}
62 changes: 62 additions & 0 deletions source/dcompute/driver/metal/buffer.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Metal GPU buffer abstraction.
*
* Wraps MTLBuffer to provide GPU memory allocation with shared or
* device-only storage modes. Mirrors dcompute.driver.cuda.buffer.
*/
module dcompute.driver.metal.buffer;

version (OSX):

/// Metal resource storage modes.
/// These map directly to MTLResourceOptions / MTLStorageMode.
enum StorageMode
{
/// CPU and GPU can both read/write. No explicit sync needed.
/// Best for small buffers or buffers that change every frame.
shared_,

/// GPU-only memory. Faster GPU access but requires explicit
/// copies to/from CPU-accessible staging buffers.
private_,

/// CPU-writable, GPU-readable. The driver manages coherency.
/// Only available on macOS (not iOS).
managed,
}

/// A typed GPU buffer backed by MTLBuffer.
struct MetalBuffer(T)
{
// Opaque handle to MTLBuffer
private void* _handle;
private size_t _count;

/// Number of elements in this buffer
@property size_t length() const { return _count; }

/// Size in bytes
@property size_t sizeBytes() const { return _count * T.sizeof; }

/// Get a CPU-side slice of the buffer contents.
/// Only valid for shared or managed storage mode.
@property T[] contents()
{
// TODO: cast MTLBuffer.contents to T* and slice
return null;
}

/// Copy data from a host array into this buffer.
void upload(const T[] data)
{
assert(data.length <= _count, "Source data exceeds buffer size");
// TODO: memcpy into MTLBuffer.contents
}

/// Copy data from this buffer into a host array.
void download(T[] dest)
{
assert(dest.length <= _count, "Destination array too small");
// TODO: memcpy from MTLBuffer.contents
}
}
57 changes: 57 additions & 0 deletions source/dcompute/driver/metal/device.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Metal device abstraction.
*
* Wraps MTLDevice to provide GPU device discovery and info queries,
* matching the interface of dcompute.driver.cuda.device.
*/
module dcompute.driver.metal.device;

version (OSX):

/// Represents a Metal-capable GPU device.
/// On Apple Silicon, there is typically one device (the integrated GPU).
/// On Intel Macs with discrete GPUs, there may be multiple.
struct MetalDevice
{
// Opaque handle to the underlying MTLDevice
// Will be populated via extern(Objective-C) bindings from metal-d
private void* _handle;

/// Returns the device name (e.g. "Apple M1", "AMD Radeon Pro 5500M")
@property string name()
{
// TODO: call [_handle name] via extern(Objective-C)
return "Metal Device (stub)";
}

/// Returns true if this device supports the Metal GPU family needed
/// for compute shaders
@property bool supportsCompute()
{
// All Metal devices support compute shaders
return true;
}

/// Maximum threads per threadgroup (typically 1024 on Apple Silicon)
@property size_t maxThreadsPerThreadgroup()
{
// TODO: query MTLDevice.maxThreadsPerThreadgroup
return 1024;
}

/// Maximum buffer length in bytes
@property size_t maxBufferLength()
{
// TODO: query MTLDevice.maxBufferLength
return 256 * 1024 * 1024; // 256 MB default
}
}

/// Get the system default Metal device.
/// Returns null-state device if no Metal GPU is available.
MetalDevice systemDefaultDevice()
{
// TODO: wrap MTLCreateSystemDefaultDevice()
MetalDevice dev;
return dev;
}
17 changes: 17 additions & 0 deletions source/dcompute/driver/metal/package.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Metal compute driver for DCompute.
*
* Provides access to Apple Metal GPU compute on macOS via
* LDC's extern(Objective-C) support and the Inochi2D/metal-d bindings.
*
* This mirrors the structure of dcompute.driver.cuda and dcompute.driver.ocl.
* Only available on macOS (version(OSX)).
*/
module dcompute.driver.metal;

version (OSX):

public import dcompute.driver.metal.queue;
public import dcompute.driver.metal.buffer;
public import dcompute.driver.metal.program;
public import dcompute.driver.metal.device;
62 changes: 62 additions & 0 deletions source/dcompute/driver/metal/program.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Metal program/library abstraction.
*
* Wraps MTLLibrary to load precompiled .metallib files
* (produced by ldc2 -mdcompute-targets=metal-300 + metallib tool).
* Mirrors dcompute.driver.cuda.program.
*/
module dcompute.driver.metal.program;

version (OSX):

/// A compiled Metal library loaded from a .metallib file.
/// Contains one or more compute kernel functions.
struct MetalProgram
{
// Opaque handle to MTLLibrary
private void* _library;

/// Whether this program has been loaded successfully
@property bool isLoaded() const
{
return _library !is null;
}

/// Get a kernel function by name from this library.
/// Returns an opaque handle to MTLFunction.
void* getFunction(string name)
{
// TODO: _library.newFunctionWithName(name.toNSString)
return null;
}

/// List all function names in this library.
/// Useful for debugging which kernels were compiled.
string[] functionNames()
{
// TODO: _library.functionNames
return [];
}
}

/// Load a compiled .metallib file from disk.
///
/// The .metallib file is produced by the pipeline:
/// ldc2 -mdcompute-targets=metal-300 kernel.d → kernel.air
/// metallib kernel.air -o kernel.metallib
///
/// Params:
/// path = path to the .metallib file
/// Returns:
/// a MetalProgram ready for kernel dispatch
MetalProgram loadLibrary(string path)
{
// TODO: Implementation outline:
// 1. auto device = MTLCreateSystemDefaultDevice();
// 2. auto url = NSURL.fileURLWithPath(path.toNSString);
// 3. NSError* err;
// 4. auto lib = device.newLibraryWithURL(url, &err);
// 5. enforce(lib !is null, err.localizedDescription);
MetalProgram prog;
return prog;
}
75 changes: 75 additions & 0 deletions source/dcompute/driver/metal/queue.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Metal command queue and kernel dispatch.
*
* Wraps MTLCommandQueue / MTLCommandBuffer / MTLComputeCommandEncoder
* to provide kernel dispatch, matching dcompute.driver.cuda.queue.
*/
module dcompute.driver.metal.queue;

version (OSX):

import dcompute.driver.metal.buffer;
import dcompute.driver.metal.program;

/// Grid dimensions for kernel dispatch (threadgroups × threads-per-threadgroup).
/// Equivalent to CUDA's grid/block dimensions.
struct MTLSize
{
size_t width = 1;
size_t height = 1;
size_t depth = 1;
}

/// A Metal command queue for submitting compute work to the GPU.
struct MetalQueue
{
// Opaque handles
private void* _device; // MTLDevice*
private void* _commandQueue; // MTLCommandQueue*

/// Dispatch a compute kernel.
///
/// Params:
/// program = compiled Metal program containing the kernel
/// kernel = name of the kernel function
/// grid = number of threadgroups
/// tgSize = threads per threadgroup
void dispatch(ref MetalProgram program, string kernel,
MTLSize grid, MTLSize tgSize)
{
// TODO: Implementation outline:
// 1. auto fn = program.getFunction(kernel);
// 2. auto pso = _device.newComputePipelineStateWithFunction(fn);
// 3. auto cmdBuf = _commandQueue.commandBuffer();
// 4. auto encoder = cmdBuf.computeCommandEncoder();
// 5. encoder.setComputePipelineState(pso);
// 6. encoder.dispatchThreadgroups(grid, tgSize);
// 7. encoder.endEncoding();
// 8. cmdBuf.commit();
}

/// Dispatch and wait for completion (synchronous).
void dispatchSync(ref MetalProgram program, string kernel,
MTLSize grid, MTLSize tgSize)
{
dispatch(program, kernel, grid, tgSize);
// TODO: cmdBuf.waitUntilCompleted();
}

/// Allocate a shared-memory buffer accessible by both CPU and GPU.
MetalBuffer!T allocate(T)(size_t count)
{
// TODO: _device.newBufferWithLength(count * T.sizeof, shared)
MetalBuffer!T buf;
buf._count = count;
return buf;
}
}

/// Create a command queue on the default Metal device.
MetalQueue createQueue()
{
// TODO: MTLCreateSystemDefaultDevice() -> newCommandQueue()
MetalQueue q;
return q;
}