diff --git a/source/dcompute/driver/backend.d b/source/dcompute/driver/backend.d index b054b20..d1e86b3 100644 --- a/source/dcompute/driver/backend.d +++ b/source/dcompute/driver/backend.d @@ -4,4 +4,5 @@ enum Backend { OpenCL120, CUDA650, + Metal300, } diff --git a/source/dcompute/driver/metal/buffer.d b/source/dcompute/driver/metal/buffer.d new file mode 100644 index 0000000..c94858a --- /dev/null +++ b/source/dcompute/driver/metal/buffer.d @@ -0,0 +1,62 @@ +/** + * Metal GPU buffer abstraction. + * + * Wraps MTLBuffer to provide GPU memory allocation with shared or + * device-only storage modes. Mirrors dcompute.driver.cuda.buffer. + */ +module dcompute.driver.metal.buffer; + +version (OSX): + +/// Metal resource storage modes. +/// These map directly to MTLResourceOptions / MTLStorageMode. +enum StorageMode +{ + /// CPU and GPU can both read/write. No explicit sync needed. + /// Best for small buffers or buffers that change every frame. + shared_, + + /// GPU-only memory. Faster GPU access but requires explicit + /// copies to/from CPU-accessible staging buffers. + private_, + + /// CPU-writable, GPU-readable. The driver manages coherency. + /// Only available on macOS (not iOS). + managed, +} + +/// A typed GPU buffer backed by MTLBuffer. +struct MetalBuffer(T) +{ + // Opaque handle to MTLBuffer + private void* _handle; + private size_t _count; + + /// Number of elements in this buffer + @property size_t length() const { return _count; } + + /// Size in bytes + @property size_t sizeBytes() const { return _count * T.sizeof; } + + /// Get a CPU-side slice of the buffer contents. + /// Only valid for shared or managed storage mode. + @property T[] contents() + { + // TODO: cast MTLBuffer.contents to T* and slice + return null; + } + + /// Copy data from a host array into this buffer. + void upload(const T[] data) + { + assert(data.length <= _count, "Source data exceeds buffer size"); + // TODO: memcpy into MTLBuffer.contents + } + + /// Copy data from this buffer into a host array. + void download(T[] dest) + { + assert(dest.length <= _count, "Destination array too small"); + // TODO: memcpy from MTLBuffer.contents + } +} diff --git a/source/dcompute/driver/metal/device.d b/source/dcompute/driver/metal/device.d new file mode 100644 index 0000000..2c63dd9 --- /dev/null +++ b/source/dcompute/driver/metal/device.d @@ -0,0 +1,57 @@ +/** + * Metal device abstraction. + * + * Wraps MTLDevice to provide GPU device discovery and info queries, + * matching the interface of dcompute.driver.cuda.device. + */ +module dcompute.driver.metal.device; + +version (OSX): + +/// Represents a Metal-capable GPU device. +/// On Apple Silicon, there is typically one device (the integrated GPU). +/// On Intel Macs with discrete GPUs, there may be multiple. +struct MetalDevice +{ + // Opaque handle to the underlying MTLDevice + // Will be populated via extern(Objective-C) bindings from metal-d + private void* _handle; + + /// Returns the device name (e.g. "Apple M1", "AMD Radeon Pro 5500M") + @property string name() + { + // TODO: call [_handle name] via extern(Objective-C) + return "Metal Device (stub)"; + } + + /// Returns true if this device supports the Metal GPU family needed + /// for compute shaders + @property bool supportsCompute() + { + // All Metal devices support compute shaders + return true; + } + + /// Maximum threads per threadgroup (typically 1024 on Apple Silicon) + @property size_t maxThreadsPerThreadgroup() + { + // TODO: query MTLDevice.maxThreadsPerThreadgroup + return 1024; + } + + /// Maximum buffer length in bytes + @property size_t maxBufferLength() + { + // TODO: query MTLDevice.maxBufferLength + return 256 * 1024 * 1024; // 256 MB default + } +} + +/// Get the system default Metal device. +/// Returns null-state device if no Metal GPU is available. +MetalDevice systemDefaultDevice() +{ + // TODO: wrap MTLCreateSystemDefaultDevice() + MetalDevice dev; + return dev; +} diff --git a/source/dcompute/driver/metal/package.d b/source/dcompute/driver/metal/package.d new file mode 100644 index 0000000..dbf847d --- /dev/null +++ b/source/dcompute/driver/metal/package.d @@ -0,0 +1,17 @@ +/** + * Metal compute driver for DCompute. + * + * Provides access to Apple Metal GPU compute on macOS via + * LDC's extern(Objective-C) support and the Inochi2D/metal-d bindings. + * + * This mirrors the structure of dcompute.driver.cuda and dcompute.driver.ocl. + * Only available on macOS (version(OSX)). + */ +module dcompute.driver.metal; + +version (OSX): + +public import dcompute.driver.metal.queue; +public import dcompute.driver.metal.buffer; +public import dcompute.driver.metal.program; +public import dcompute.driver.metal.device; diff --git a/source/dcompute/driver/metal/program.d b/source/dcompute/driver/metal/program.d new file mode 100644 index 0000000..7be7792 --- /dev/null +++ b/source/dcompute/driver/metal/program.d @@ -0,0 +1,62 @@ +/** + * Metal program/library abstraction. + * + * Wraps MTLLibrary to load precompiled .metallib files + * (produced by ldc2 -mdcompute-targets=metal-300 + metallib tool). + * Mirrors dcompute.driver.cuda.program. + */ +module dcompute.driver.metal.program; + +version (OSX): + +/// A compiled Metal library loaded from a .metallib file. +/// Contains one or more compute kernel functions. +struct MetalProgram +{ + // Opaque handle to MTLLibrary + private void* _library; + + /// Whether this program has been loaded successfully + @property bool isLoaded() const + { + return _library !is null; + } + + /// Get a kernel function by name from this library. + /// Returns an opaque handle to MTLFunction. + void* getFunction(string name) + { + // TODO: _library.newFunctionWithName(name.toNSString) + return null; + } + + /// List all function names in this library. + /// Useful for debugging which kernels were compiled. + string[] functionNames() + { + // TODO: _library.functionNames + return []; + } +} + +/// Load a compiled .metallib file from disk. +/// +/// The .metallib file is produced by the pipeline: +/// ldc2 -mdcompute-targets=metal-300 kernel.d → kernel.air +/// metallib kernel.air -o kernel.metallib +/// +/// Params: +/// path = path to the .metallib file +/// Returns: +/// a MetalProgram ready for kernel dispatch +MetalProgram loadLibrary(string path) +{ + // TODO: Implementation outline: + // 1. auto device = MTLCreateSystemDefaultDevice(); + // 2. auto url = NSURL.fileURLWithPath(path.toNSString); + // 3. NSError* err; + // 4. auto lib = device.newLibraryWithURL(url, &err); + // 5. enforce(lib !is null, err.localizedDescription); + MetalProgram prog; + return prog; +} diff --git a/source/dcompute/driver/metal/queue.d b/source/dcompute/driver/metal/queue.d new file mode 100644 index 0000000..45b3824 --- /dev/null +++ b/source/dcompute/driver/metal/queue.d @@ -0,0 +1,75 @@ +/** + * Metal command queue and kernel dispatch. + * + * Wraps MTLCommandQueue / MTLCommandBuffer / MTLComputeCommandEncoder + * to provide kernel dispatch, matching dcompute.driver.cuda.queue. + */ +module dcompute.driver.metal.queue; + +version (OSX): + +import dcompute.driver.metal.buffer; +import dcompute.driver.metal.program; + +/// Grid dimensions for kernel dispatch (threadgroups × threads-per-threadgroup). +/// Equivalent to CUDA's grid/block dimensions. +struct MTLSize +{ + size_t width = 1; + size_t height = 1; + size_t depth = 1; +} + +/// A Metal command queue for submitting compute work to the GPU. +struct MetalQueue +{ + // Opaque handles + private void* _device; // MTLDevice* + private void* _commandQueue; // MTLCommandQueue* + + /// Dispatch a compute kernel. + /// + /// Params: + /// program = compiled Metal program containing the kernel + /// kernel = name of the kernel function + /// grid = number of threadgroups + /// tgSize = threads per threadgroup + void dispatch(ref MetalProgram program, string kernel, + MTLSize grid, MTLSize tgSize) + { + // TODO: Implementation outline: + // 1. auto fn = program.getFunction(kernel); + // 2. auto pso = _device.newComputePipelineStateWithFunction(fn); + // 3. auto cmdBuf = _commandQueue.commandBuffer(); + // 4. auto encoder = cmdBuf.computeCommandEncoder(); + // 5. encoder.setComputePipelineState(pso); + // 6. encoder.dispatchThreadgroups(grid, tgSize); + // 7. encoder.endEncoding(); + // 8. cmdBuf.commit(); + } + + /// Dispatch and wait for completion (synchronous). + void dispatchSync(ref MetalProgram program, string kernel, + MTLSize grid, MTLSize tgSize) + { + dispatch(program, kernel, grid, tgSize); + // TODO: cmdBuf.waitUntilCompleted(); + } + + /// Allocate a shared-memory buffer accessible by both CPU and GPU. + MetalBuffer!T allocate(T)(size_t count) + { + // TODO: _device.newBufferWithLength(count * T.sizeof, shared) + MetalBuffer!T buf; + buf._count = count; + return buf; + } +} + +/// Create a command queue on the default Metal device. +MetalQueue createQueue() +{ + // TODO: MTLCreateSystemDefaultDevice() -> newCommandQueue() + MetalQueue q; + return q; +}