diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index b1720d0d3d997..915d97e7a56c8 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -752,6 +752,16 @@ if (onnxruntime_USE_WEBGPU) # ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/safari_polyfill.patch && + # The dawn_device_lost_keepalive.patch contains the following changes: + # + # - (private) Fix premature ABORT when device.lost fires in callUserCallback + # The device.lost handler was wrapped in callUserCallback without runtimeKeepalivePush/Pop, + # causing maybeExit() to trigger _exit(0) and set ABORT=true when runtimeKeepaliveCounter + # was 0. This silently dropped all subsequent WebGPU callbacks (e.g. requestAdapter), + # breaking session re-creation after device destruction. + # + ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_device_lost_keepalive.patch && + # The dawn_dxc_output_dir.patch contains the following changes: # # - (private) Fix DXC output directory for RelWithDebInfo and MinSizeRel configs @@ -762,6 +772,18 @@ if (onnxruntime_USE_WEBGPU) # ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_dxc_output_dir.patch && + # The dawn_buffer_fix_injection.patch contains the following changes: + # + # - (private) Fix importJsBuffer calling wrong WGPUBufferImpl constructor + # Without this patch, importJsBuffer calls emwgpuCreateBuffer which invokes the + # (source, mappedAtCreation=false) constructor instead of the injection constructor + # tagged with kImportedFromJS. This patch adjusts the injection constructor signature + # to disambiguate it from the (source, mappedAtCreation) overload so emwgpuCreateBuffer + # reliably selects the injection constructor and imported buffers are properly tagged + # as kImportedFromJS. + # + ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_buffer_fix_injection.patch && + # Remove the test folder to speed up potential file scan operations (70k+ files not needed for build). # Using token ensures the correct absolute path regardless of working directory. ${CMAKE_COMMAND} -E rm -rf /test) diff --git a/cmake/patches/dawn/dawn_buffer_fix_injection.patch b/cmake/patches/dawn/dawn_buffer_fix_injection.patch new file mode 100644 index 0000000000000..5546311a9901f --- /dev/null +++ b/cmake/patches/dawn/dawn_buffer_fix_injection.patch @@ -0,0 +1,35 @@ +diff --git a/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp b/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp +--- a/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp ++++ b/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp +@@ -749,7 +749,7 @@ struct WGPUBufferImpl final : public EventSource, + public: + WGPUBufferImpl(const EventSource* source, bool mappedAtCreation); + // Injection constructor used when we already have a backing Buffer. +- WGPUBufferImpl(const EventSource* source, WGPUBufferMapState mapState); ++ WGPUBufferImpl(const EventSource* source); + ~WGPUBufferImpl(); + + void Destroy(); +@@ -1301,7 +1301,7 @@ WGPUAdapter emwgpuCreateAdapter(const EventSource* source) { + } + + WGPUBuffer emwgpuCreateBuffer(const EventSource* source) { +- return ReturnToAPI(AcquireRef(new WGPUBufferImpl(source, false))); ++ return ReturnToAPI(AcquireRef(new WGPUBufferImpl(source))); + } + + WGPUDevice emwgpuCreateDevice(const EventSource* source, WGPUQueue queue) { +@@ -1441,11 +1441,10 @@ WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation) + } + } + +-WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, +- WGPUBufferMapState mapState) ++WGPUBufferImpl::WGPUBufferImpl(const EventSource* source) + : EventSource(source), + RefCountedWithExternalCount(kImportedFromJS), +- mMapState(mapState) {} ++ mMapState(WGPUBufferMapState_Unmapped) {} + + WGPUBufferImpl::~WGPUBufferImpl() { + if (!IsImported()) { diff --git a/cmake/patches/dawn/dawn_device_lost_keepalive.patch b/cmake/patches/dawn/dawn_device_lost_keepalive.patch new file mode 100644 index 0000000000000..1d522c2e7ba71 --- /dev/null +++ b/cmake/patches/dawn/dawn_device_lost_keepalive.patch @@ -0,0 +1,22 @@ +diff --git a/third_party/emdawnwebgpu/pkg/webgpu/src/library_webgpu.js b/third_party/emdawnwebgpu/pkg/webgpu/src/library_webgpu.js +--- a/third_party/emdawnwebgpu/pkg/webgpu/src/library_webgpu.js ++++ b/third_party/emdawnwebgpu/pkg/webgpu/src/library_webgpu.js +@@ -876,7 +876,9 @@ + #if ASSERTIONS + assert(deviceLostFutureId); + #endif +- // Don't keepalive here, because this isn't guaranteed to ever happen. ++ // Keep the runtime alive until device.lost resolves, to prevent ++ // maybeExit() from triggering premature ABORT during callUserCallback. ++ {{{ runtimeKeepalivePush() }}} + WebGPU.Internals.futureInsert(deviceLostFutureId, device.lost.then((info) => { + // If the runtime has exited, avoid calling callUserCallback as it + // will print an error (e.g. if the device got freed during shutdown). +@@ -892,6 +894,7 @@ + {{{ gpu.passAsPointer('messagePtr') }}}); + stackRestore(sp); + }); ++ {{{ runtimeKeepalivePop() }}} + })); + + // Set up uncaptured error handlers.