diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4f22a4598d..84d0c5b36a 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -2590,6 +2590,13 @@ Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) { + // Dot2/3/4 DXIL operations only support half and float, not double. + // For double vectors, expand the dot product using FMul and FMad. + if (arg0->getType()->getScalarType()->isDoubleTy()) { + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, + DXIL::OpCode::FMad); + } + switch (vecSize) { case 2: return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_double.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_double.hlsl new file mode 100644 index 0000000000..f4f33487ca --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_double.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -T cs_6_0 -E main -DFUNC=mul -DDIM=4 %s | FileCheck %s +// RUN: %dxc -T cs_6_0 -E main -DFUNC=mul -DDIM=3 %s | FileCheck %s +// RUN: %dxc -T cs_6_0 -E main -DFUNC=mul -DDIM=2 %s | FileCheck %s +// RUN: %dxc -T cs_6_0 -E main -DFUNC=dot -DDIM=4 %s | FileCheck %s +// RUN: %dxc -T cs_6_0 -E main -DFUNC=dot -DDIM=3 %s | FileCheck %s +// RUN: %dxc -T cs_6_0 -E main -DFUNC=dot -DDIM=2 %s | FileCheck %s + +// Verify that mul and dot of double vectors do not produce invalid DXIL Dot +// intrinsics (Dot2/3/4 only support half and float). Instead, the dot product +// should be expanded using FMul and FMad. +// %dxc runs validation, so the test implicitly verifies DXIL validity. + +// CHECK-NOT: call double @dx.op.dot2.f64 +// CHECK-NOT: call double @dx.op.dot3.f64 +// CHECK-NOT: call double @dx.op.dot4.f64 +// CHECK: fmul fast double +// CHECK: call double @dx.op.tertiary.f64(i32 46, + +#if DIM == 4 +typedef double4 DVec; +#elif DIM == 3 +typedef double3 DVec; +#else +typedef double2 DVec; +#endif + +RWStructuredBuffer In; +RWStructuredBuffer Out; + +[numthreads(1, 1, 1)] +void main() { + Out[0] = FUNC(In[0], In[1]); +}