-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Vulkan: wrap external buffers as regions #9110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,13 +78,21 @@ struct RegionIndexing { | |
| int32_t offset = 0; //< indexing offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) | ||
| }; | ||
|
|
||
| enum class MemoryRegionKind { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'Kind' is pretty vague. Perhaps rename this enum to something like
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep I agree will change this. |
||
| AllocatorOwned, //< region metadata managed by allocator; handle points to native resource | ||
| CropAlias, //< alias metadata for a cropped view; owner points to root region | ||
| ExternalWrapped //< metadata supplied by caller for an external native resource | ||
| }; | ||
|
|
||
| // Client-facing struct for exchanging memory region allocation requests | ||
| struct MemoryRegion { | ||
| void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) or a pointer to region owning allocation | ||
| RegionAllocation allocation; //< allocation in parent block for region | ||
| RegionIndexing indexing; //< indexing adjustments for controlling access | ||
| bool dedicated = false; //< flag indicating whether allocation is one dedicated resource (or split/shared into other resources) | ||
| bool is_owner = true; //< flag indicating whether allocation is owned by this region, in which case handle is a native handle. Otherwise handle points to owning region of allocation. | ||
| MemoryRegionKind kind = MemoryRegionKind::AllocatorOwned; | ||
| MemoryRegion *owner = nullptr; | ||
| MemoryProperties properties; //< properties for the allocated region | ||
| }; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,10 +213,15 @@ extern "C" __attribute__((used)) void *halide_runtime_api_functions[] = { | |
| (void *)&halide_d3d12compute_release_context, | ||
| (void *)&halide_d3d12compute_run, | ||
| (void *)&halide_vulkan_acquire_context, | ||
| (void *)&halide_vulkan_detach_vk_buffer, | ||
| (void *)&halide_vulkan_device_interface, | ||
| (void *)&halide_vulkan_get_vk_buffer, | ||
| (void *)&halide_vulkan_get_vk_crop_offset, | ||
| (void *)&halide_vulkan_initialize_kernels, | ||
| (void *)&halide_vulkan_release_context, | ||
| (void *)&halide_vulkan_run, | ||
| (void *)&halide_vulkan_wrap_vk_buffer, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only need one of these, remove the second. |
||
| (void *)&halide_vulkan_wrap_vk_buffer_with_offset, | ||
| (void *)&halide_webgpu_device_interface, | ||
| (void *)&halide_webgpu_initialize_kernels, | ||
| (void *)&halide_webgpu_finalize_kernels, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,92 @@ | |
| #include "vulkan_resources.h" | ||
|
|
||
| using namespace Halide::Runtime::Internal::Vulkan; | ||
| using Halide::Runtime::Internal::MemoryRegionKind; | ||
|
|
||
| // -------------------------------------------------------------------------- | ||
|
|
||
| namespace Halide { | ||
| namespace Runtime { | ||
| namespace Internal { | ||
| namespace Vulkan { | ||
|
|
||
| ALWAYS_INLINE const MemoryRegion *vk_region_root(const MemoryRegion *region) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't returning the root ... it's returning the owner. Rename it
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this different than
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I needed an helper to follow crop aliases back to the owning region. A cropped buffer gets a MemoryRegion that is just metadata pointing back to the owning region. When we wrap buffers, we need to walk through any crop aliases to find the actual external wrapper region that owns the vkBuffer metadata and base offset. I think I encountered this specifically when dealing with nv21/nv12. |
||
| const MemoryRegion *current = region; | ||
| while (current != nullptr && current->kind == MemoryRegionKind::CropAlias) { | ||
| current = current->owner != nullptr ? current->owner : reinterpret_cast<MemoryRegion *>(current->handle); | ||
| } | ||
| return current; | ||
| } | ||
|
|
||
| ALWAYS_INLINE MemoryRegion *vk_region_root(MemoryRegion *region) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this ... overloading the same method is error prone, and the const casting is bad. |
||
| return const_cast<MemoryRegion *>(vk_region_root(static_cast<const MemoryRegion *>(region))); | ||
| } | ||
|
|
||
| ALWAYS_INLINE uint64_t vk_external_buffer_offset_bytes(void *user_context, const MemoryRegion *region) { | ||
| halide_debug_assert(user_context, region != nullptr); | ||
| const MemoryRegion *root = vk_region_root(region); | ||
| return (root != nullptr && root->kind == MemoryRegionKind::ExternalWrapped) ? root->allocation.offset : 0; | ||
| } | ||
|
|
||
| ALWAYS_INLINE uint64_t vk_total_buffer_offset_bytes(void *user_context, const MemoryRegion *region, halide_type_t type) { | ||
| return vk_external_buffer_offset_bytes(user_context, region) + (region->indexing.offset * type.bytes()); | ||
| } | ||
|
|
||
| ALWAYS_INLINE void vk_destroy_wrapped_region(MemoryRegion *region) { | ||
| if (region == nullptr) { | ||
| return; | ||
| } | ||
|
|
||
| MemoryRegion *root = vk_region_root(region); | ||
| if (root == nullptr) { | ||
| return; | ||
| } | ||
|
|
||
| if (region != root && region->kind == MemoryRegionKind::CropAlias) { | ||
| free(region); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You shouldn't be calling
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep agreed, you're right. This was host metadata tho, but I will use vk_host_malloc and vk_host_free |
||
| return; | ||
| } | ||
|
|
||
| if (root->kind != MemoryRegionKind::ExternalWrapped) { | ||
| return; | ||
| } | ||
|
|
||
| free(root->handle); | ||
| free(root); | ||
| } | ||
|
|
||
| ALWAYS_INLINE MemoryRegion *vk_create_wrapped_buffer_region(void *user_context, | ||
| halide_buffer_t *buf, | ||
| VkBuffer vk_buffer, | ||
| uint64_t offset) { | ||
| VkBuffer *native_handle = reinterpret_cast<VkBuffer *>(malloc(sizeof(VkBuffer))); | ||
| if (native_handle == nullptr) { | ||
| error(user_context) << "Vulkan: Failed to allocate wrapped buffer handle metadata.\n"; | ||
| return nullptr; | ||
| } | ||
|
|
||
| MemoryRegion *region = reinterpret_cast<MemoryRegion *>(malloc(sizeof(MemoryRegion))); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You shouldn't be calling |
||
| if (region == nullptr) { | ||
| free(native_handle); | ||
| error(user_context) << "Vulkan: Failed to allocate wrapped buffer region metadata.\n"; | ||
| return nullptr; | ||
| } | ||
|
|
||
| *native_handle = vk_buffer; | ||
| memset(region, 0, sizeof(MemoryRegion)); | ||
| region->handle = native_handle; | ||
| region->allocation.offset = offset; | ||
| region->allocation.size = buf->size_in_bytes(); | ||
| region->is_owner = true; | ||
| region->kind = MemoryRegionKind::ExternalWrapped; | ||
| region->owner = nullptr; | ||
| return region; | ||
| } | ||
|
|
||
| } // namespace Vulkan | ||
| } // namespace Internal | ||
| } // namespace Runtime | ||
| } // namespace Halide | ||
|
|
||
| // -------------------------------------------------------------------------- | ||
|
|
||
|
|
@@ -110,12 +196,20 @@ WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_b | |
|
|
||
| // get the allocated region for the device | ||
| MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(halide_buffer->device); | ||
| #ifdef DEBUG_RUNTIME | ||
| const uint64_t device_region_size = device_region->allocation.size; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is only for printing a debug message, then move the declaration and the ifdef down below and wrap the |
||
| #endif | ||
| MemoryRegion *memory_region = ctx.allocator->owner_of(user_context, device_region); | ||
| if (ctx.allocator && memory_region && memory_region->handle) { | ||
| if (halide_can_reuse_device_allocations(user_context)) { | ||
| ctx.allocator->release(user_context, memory_region); | ||
| if (memory_region->kind == MemoryRegionKind::ExternalWrapped) { | ||
| debug(user_context) << "Vulkan: Releasing wrapped external buffer metadata only.\n"; | ||
| vk_destroy_wrapped_region(device_region); | ||
| } else { | ||
| ctx.allocator->reclaim(user_context, memory_region); | ||
| if (halide_can_reuse_device_allocations(user_context)) { | ||
| ctx.allocator->release(user_context, memory_region); | ||
| } else { | ||
| ctx.allocator->reclaim(user_context, memory_region); | ||
| } | ||
| } | ||
| } | ||
| halide_buffer->device = 0; | ||
|
|
@@ -126,7 +220,7 @@ WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_b | |
| debug(user_context) << "Vulkan: Released memory for device region (" | ||
| << "user_context: " << user_context << ", " | ||
| << "buffer: " << halide_buffer << ", " | ||
| << "size_in_bytes: " << (uint64_t)device_region->allocation.size << ")\n"; | ||
| << "size_in_bytes: " << device_region_size << ")\n"; | ||
|
|
||
| uint64_t t_after = halide_current_time_ns(user_context); | ||
| debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; | ||
|
|
@@ -272,15 +366,24 @@ WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { | |
| size_t size = buf->size_in_bytes(); | ||
| if (buf->device) { | ||
| MemoryRegion *device_region = (MemoryRegion *)(buf->device); | ||
| if (device_region->allocation.size >= size) { | ||
| MemoryRegion *memory_region = ctx.allocator->owner_of(user_context, device_region); | ||
| if (memory_region != nullptr && memory_region->allocation.size >= size) { | ||
| debug(user_context) << "Vulkan: Requested allocation for existing device memory ... using existing buffer!\n"; | ||
| return halide_error_code_success; | ||
| } else { | ||
| if (memory_region == nullptr) { | ||
| error(user_context) << "Vulkan: Failed to retrieve memory region for existing device buffer!\n"; | ||
| return halide_error_code_internal_error; | ||
| } | ||
| if (memory_region->kind == MemoryRegionKind::ExternalWrapped) { | ||
| error(user_context) << "Vulkan: Wrapped external buffer is too small for requested allocation!\n"; | ||
| return halide_error_code_device_malloc_failed; | ||
| } | ||
| debug(user_context) << "Vulkan: Requested allocation of different size ... reallocating buffer!\n"; | ||
| if (halide_can_reuse_device_allocations(user_context)) { | ||
| ctx.allocator->release(user_context, device_region); | ||
| ctx.allocator->release(user_context, memory_region); | ||
| } else { | ||
| ctx.allocator->reclaim(user_context, device_region); | ||
| ctx.allocator->reclaim(user_context, memory_region); | ||
| } | ||
| buf->device = 0; | ||
| } | ||
|
|
@@ -487,7 +590,7 @@ WEAK int halide_vulkan_copy_to_device(void *user_context, halide_buffer_t *halid | |
| bool to_host = false; | ||
|
|
||
| uint64_t src_offset = copy_helper.src_begin; | ||
| uint64_t dst_offset = copy_helper.dst_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); | ||
| uint64_t dst_offset = copy_helper.dst_begin + vk_total_buffer_offset_bytes(user_context, device_region, halide_buffer->type); | ||
|
|
||
| copy_helper.src = (uint64_t)(staging_buffer); | ||
| copy_helper.dst = (uint64_t)(device_buffer); | ||
|
|
@@ -656,7 +759,7 @@ WEAK int halide_vulkan_copy_to_host(void *user_context, halide_buffer_t *halide_ | |
| bool from_host = false; | ||
| bool to_host = true; | ||
| uint64_t copy_dst = copy_helper.dst; | ||
| uint64_t src_offset = copy_helper.src_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); | ||
| uint64_t src_offset = copy_helper.src_begin + vk_total_buffer_offset_bytes(user_context, device_region, halide_buffer->type); | ||
| uint64_t dst_offset = copy_helper.dst_begin; | ||
|
|
||
| copy_helper.src = (uint64_t)(device_buffer); | ||
|
|
@@ -937,8 +1040,8 @@ WEAK int halide_vulkan_buffer_copy(void *user_context, struct halide_buffer_t *s | |
|
|
||
| // define the src and dst config | ||
| uint64_t copy_dst = copy_helper.dst; | ||
| uint64_t src_offset = copy_helper.src_begin + (src_buffer_region->indexing.offset * src->type.bytes()); | ||
| uint64_t dst_offset = copy_helper.dst_begin + (dst_buffer_region->indexing.offset * dst->type.bytes()); | ||
| uint64_t src_offset = copy_helper.src_begin + vk_total_buffer_offset_bytes(user_context, src_buffer_region, src->type); | ||
| uint64_t dst_offset = copy_helper.dst_begin + vk_total_buffer_offset_bytes(user_context, dst_buffer_region, dst->type); | ||
|
|
||
| copy_helper.src = (uint64_t)(src_device_buffer); | ||
| copy_helper.dst = (uint64_t)(dst_device_buffer); | ||
|
|
@@ -1345,18 +1448,37 @@ WEAK int halide_vulkan_device_and_host_free(void *user_context, struct halide_bu | |
| return halide_default_device_and_host_free(user_context, buf, &vulkan_device_interface); | ||
| } | ||
|
|
||
| WEAK int halide_vulkan_wrap_vk_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t vk_buffer) { | ||
| WEAK int halide_vulkan_wrap_vk_buffer_with_offset(void *user_context, | ||
| struct halide_buffer_t *buf, | ||
| uint64_t vk_buffer, | ||
| uint64_t offset) { | ||
| halide_debug_assert(user_context, buf->device == 0); | ||
| if (buf->device != 0) { | ||
| error(user_context) << "Vulkan: Unable to wrap buffer ... invalid device pointer!\n"; | ||
| return halide_error_code_device_wrap_native_failed; | ||
| } | ||
| buf->device = vk_buffer; | ||
| if (vk_buffer == 0) { | ||
| error(user_context) << "Vulkan: Unable to wrap buffer ... invalid VkBuffer handle!\n"; | ||
| return halide_error_code_device_wrap_native_failed; | ||
| } | ||
|
|
||
| MemoryRegion *region = vk_create_wrapped_buffer_region(user_context, buf, | ||
| reinterpret_cast<VkBuffer>(vk_buffer), | ||
| offset); | ||
| if (region == nullptr) { | ||
| return halide_error_code_out_of_memory; | ||
| } | ||
|
|
||
| buf->device = reinterpret_cast<uint64_t>(region); | ||
| buf->device_interface = &vulkan_device_interface; | ||
| buf->device_interface->impl->use_module(); | ||
| return halide_error_code_success; | ||
| } | ||
|
|
||
| WEAK int halide_vulkan_wrap_vk_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t vk_buffer) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need two methods for this. Remove this one, and rename the other one to |
||
| return halide_vulkan_wrap_vk_buffer_with_offset(user_context, buf, vk_buffer, 0); | ||
| } | ||
|
|
||
| WEAK int halide_vulkan_detach_vk_buffer(void *user_context, halide_buffer_t *buf) { | ||
| if (buf->device == 0) { | ||
| return halide_error_code_success; | ||
|
|
@@ -1365,6 +1487,13 @@ WEAK int halide_vulkan_detach_vk_buffer(void *user_context, halide_buffer_t *buf | |
| error(user_context) << "Vulkan: Unable to detach buffer ... invalid device interface!\n"; | ||
| return halide_error_code_incompatible_device_interface; | ||
| } | ||
| MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(buf->device); | ||
| MemoryRegion *root_region = vk_region_root(device_region); | ||
| if (root_region == nullptr || root_region->kind != MemoryRegionKind::ExternalWrapped) { | ||
| error(user_context) << "Vulkan: Unable to detach buffer ... buffer is not externally wrapped!\n"; | ||
| return halide_error_code_device_detach_native_failed; | ||
| } | ||
| vk_destroy_wrapped_region(device_region); | ||
| buf->device = 0; | ||
| buf->device_interface->impl->release_module(); | ||
| buf->device_interface = nullptr; | ||
|
|
@@ -1376,7 +1505,21 @@ WEAK uintptr_t halide_vulkan_get_vk_buffer(void *user_context, halide_buffer_t * | |
| return 0; | ||
| } | ||
| halide_debug_assert(user_context, buf->device_interface == &vulkan_device_interface); | ||
| return (uintptr_t)buf->device; | ||
| MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(buf->device); | ||
| MemoryRegion *root_region = vk_region_root(device_region); | ||
| if (root_region == nullptr || root_region->handle == nullptr) { | ||
| return 0; | ||
| } | ||
| return (uintptr_t)(*reinterpret_cast<VkBuffer *>(root_region->handle)); | ||
| } | ||
|
|
||
| WEAK uint64_t halide_vulkan_get_vk_crop_offset(void *user_context, halide_buffer_t *buf) { | ||
| if (buf->device == 0) { | ||
| return 0; | ||
| } | ||
| halide_debug_assert(user_context, buf->device_interface == &vulkan_device_interface); | ||
| MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(buf->device); | ||
| return vk_total_buffer_offset_bytes(user_context, device_region, buf->type); | ||
| } | ||
|
|
||
| WEAK const struct halide_device_interface_t *halide_vulkan_device_interface() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need two methods for this ... just keep this one with the offset parameter and rename it
halide_vulkan_wrap_vk_bufferand delete the other one.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea I agree. I did the split because I wanted to preserve the zero-offset call.