diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index ae4bcdb9eeb64..9b4613c853206 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -401,6 +401,9 @@ namespace clang { bool isOverloadFirstandLast() const { return Flags & IsOverloadFirstandLast; } + bool isOverloadDefaultAndOp0() const { + return Flags & IsOverloadDefaultAndOp0; + } bool isPrefetch() const { return Flags & IsPrefetch; } bool isReverseCompare() const { return Flags & ReverseCompare; } bool isAppendSVALL() const { return Flags & IsAppendSVALL; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ac9f9af30fce7..a5c84f163d8c9 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1001,6 +1001,17 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>; } + +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { +def SVCVTZN_S : SInst<"svcvtzn_{0}[_{1}_x2]", "y2.d", "hfd", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsReductionQV, VerifyRuntimeMode]>; +def SVCVTZN_U : SInst<"svcvtzn_{0}[_{1}_x2]", "e2.d", "hfd", MergeNone, "aarch64_sve_fcvtzun_x2", [IsReductionQV, VerifyRuntimeMode]>; + +foreach suffix = ["b", "t"] in { +def SVCVT # !toupper(suffix) # _S: SInst<"svcvt" # suffix # "_{d}[_{1}]", "dy", "hfd", MergeNone, "aarch64_sve_scvtf" # suffix, [IsOverloadDefaultAndOp0, VerifyRuntimeMode]>; +def SVCVT # !toupper(suffix) # _U: SInst<"svcvt" # suffix # "_{d}[_{1}]", "de", "hfd", MergeNone, "aarch64_sve_ucvtf" # suffix, [IsOverloadDefaultAndOp0, VerifyRuntimeMode]>; +} +} + //////////////////////////////////////////////////////////////////////////////// // Permutations and selection @@ -2535,4 +2546,4 @@ let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; -} \ No newline at end of file +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 7e60e87b12a4d..b1fbafa6e81e7 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -78,6 +78,7 @@ include "arm_immcheck_incl.td" // R: scalar of 1/2 width element type (splat to vector type) // r: scalar of 1/4 width element type (splat to vector type) // @: unsigned scalar of 1/4 width element type (splat to vector type) +// y: 1/2 width signed elements, 2x element count // e: 1/2 width unsigned elements, 2x element count // b: 1/4 width unsigned elements, 4x element count // h: 1/2 width elements, 2x element count @@ -215,6 +216,7 @@ def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does no def IsOverloadWhileOrMultiVecCvt : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. def IsOverloadFirstandLast : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. +def IsOverloadDefaultAndOp0 : FlagType<0x4000000000000>; // Use {default type, typeof(operand0)} as overloaded types. def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. def IsByteIndexed : FlagType<0x01000000>; def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index f8990ced2a577..6aa92a2361b56 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3939,11 +3939,13 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, if (TypeFlags.isOverloadWhileRW()) return {getSVEPredType(TypeFlags), Ops[0]->getType()}; + if (TypeFlags.isOverloadDefaultAndOp0()) + return {DefaultType, Ops[0]->getType()}; + if (TypeFlags.isOverloadFirstandLast()) return {Ops[0]->getType(), Ops.back()->getType()}; - if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() && - ResultType->isVectorTy()) + if (TypeFlags.isReductionQV()) return {ResultType, Ops[1]->getType()}; assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c new file mode 100644 index 0000000000000..ded4f3a02d2a0 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c @@ -0,0 +1,113 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svcvtzn_s8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtzn_s8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svcvtzn_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s8,_f16_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_s16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svcvtzn_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s16,_f32_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_s32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svcvtzn_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s32,_f64_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtzn_u8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svcvtzn_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u8,_f16_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svcvtzn_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u16,_f32_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svcvtzn_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u32,_f64_x2)(zn); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c new file mode 100644 index 0000000000000..c12a0fff786ef --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c @@ -0,0 +1,197 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svcvtb_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f16,_s8)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f32,_s16)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f64,_s32)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f16,_u8)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f32,_u16)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f64,_u32)(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f16,_s8)(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f32,_s16)(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f64,_s32)(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f16,_u8)(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f32,_u16)(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f64,_u32)(zn); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c index 34ead79e726ab..8f0a28b260d5d 100644 --- a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -14,6 +14,9 @@ void test(void) { int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -86,6 +89,42 @@ void test(void) { svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); @@ -171,6 +210,9 @@ void test_streaming(void) __arm_streaming{ int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -243,6 +285,42 @@ void test_streaming(void) __arm_streaming{ svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); @@ -328,6 +406,9 @@ void test_streaming_compatible(void) __arm_streaming_compatible{ int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -400,6 +481,42 @@ void test_streaming_compatible(void) __arm_streaming_compatible{ svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index accb7b240288f..0f770a3202538 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -697,6 +697,10 @@ void SVEType::applyModifier(char Mod) { Kind = UInt; ElementBitwidth /= 2; break; + case 'y': + Kind = SInt; + ElementBitwidth /= 2; + break; case 'h': ElementBitwidth /= 2; break; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 48c38fb2c2c9f..ec084626d5b2a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1048,6 +1048,7 @@ def llvm_nxv4i1_ty : LLVMType; def llvm_nxv8i1_ty : LLVMType; def llvm_nxv16i1_ty : LLVMType; def llvm_nxv16i8_ty : LLVMType; +def llvm_nxv8i16_ty : LLVMType; def llvm_nxv4i32_ty : LLVMType; def llvm_nxv2i64_ty : LLVMType; def llvm_nxv8f16_ty : LLVMType; @@ -2613,6 +2614,19 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +// +// SVE2 - Multi-vector narrowing convert to floating point +// + +def int_aarch64_sve_scvtfb + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_scvtft + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_ucvtfb + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_ucvtft + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; + // // SVE2 - Floating-point integer binary logarithm // @@ -4018,6 +4032,12 @@ let TargetPrefix = "aarch64" in { LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + + // + // SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point + // + def int_aarch64_sve_fcvtzsn_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic; + def int_aarch64_sve_fcvtzun_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 9df77f8e93c64..6a03b9abb3c19 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4858,14 +4858,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; // SVE2 fp convert, narrow and interleave to integer, rounding toward zero - defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; - defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>; + defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn_x2>; + defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun_x2>; // SVE2 signed/unsigned integer convert to floating-point - defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>; - defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>; - defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>; - defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, int_aarch64_sve_scvtfb>; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, int_aarch64_sve_scvtft>; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, int_aarch64_sve_ucvtfb>; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, int_aarch64_sve_ucvtft>; // SVE2 saturating shift right narrow by immediate and interleave defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 99836aeed7c0a..771c4c1fb2b6e 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -2558,7 +2558,7 @@ class sme2_cvt_vg2_single op, multiclass sme2_cvt_vg2_single op, ValueType out_vt, ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single; - def : SVE2p1_Cvt_VG2_Pat; + def : SVE_Cvt_VG2_Pat; } // SME2 multi-vec FP8 down convert two registers diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 106986a64ffba..947ef7f9c9f6d 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -724,7 +724,7 @@ class SVE_Sat_Shift_VG2_Pat(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; -class SVE2p1_Cvt_VG2_Pat +class SVE_Cvt_VG2_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; @@ -10174,7 +10174,7 @@ class sve2p1_multi_vec_extract_narrow opc, bits<3> tsz> multiclass sve2p1_multi_vec_extract_narrow opc, SDPatternOperator intrinsic> { def NAME : sve2p1_multi_vec_extract_narrow; - def : SVE2p1_Cvt_VG2_Pat; + def : SVE_Cvt_VG2_Pat; } // SVE2 multi-vec shift narrow @@ -11485,10 +11485,14 @@ class sve2_fp_to_int_downcvt { +multiclass sve2_fp_to_int_downcvt { def _HtoB : sve2_fp_to_int_downcvt; def _StoH : sve2_fp_to_int_downcvt; def _DtoS : sve2_fp_to_int_downcvt; + + def: SVE_Cvt_VG2_Pat; + def: SVE_Cvt_VG2_Pat; + def: SVE_Cvt_VG2_Pat; } //===----------------------------------------------------------------------===// @@ -11508,8 +11512,12 @@ class sve2_int_to_fp_upcvt U> { +multiclass sve2_int_to_fp_upcvt U, SDPatternOperator op> { def _BtoH : sve2_int_to_fp_upcvt; def _HtoS : sve2_int_to_fp_upcvt; def _StoD : sve2_int_to_fp_upcvt; + + def : SVE_1_Op_Pat(NAME # _BtoH)>; + def : SVE_1_Op_Pat(NAME # _HtoS)>; + def : SVE_1_Op_Pat(NAME # _StoD)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..1a07e4371810d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s +; +; SVCVTB (SCVTFB / UCVTFB) +; + +define @scvtfb_f16_i8( %zn) { +; CHECK-LABEL: scvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, z0.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8( %zn) + ret %res +} + +define @scvtfb_f32_i16( %zn) { +; CHECK-LABEL: scvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16( %zn) + ret %res +} + +define @scvtfb_f64_i32( %zn) { +; CHECK-LABEL: scvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.d, z0.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32( %zn) + ret %res +} + +define @ucvtfb_f16_i8( %zn) { +; CHECK-LABEL: ucvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, z0.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8( %zn) + ret %res +} + +define @ucvtfb_f32_i16( %zn) { +; CHECK-LABEL: ucvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16( %zn) + ret %res +} + +define @ucvtfb_f64_i32( %zn) { +; CHECK-LABEL: ucvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.d, z0.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32( %zn) + ret %res +} + +; +; SVCVTT (SCVTFLT / UCVTFLT) +; + +define @scvtflt_f16_i8( %zn) { +; CHECK-LABEL: scvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.h, z0.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8( %zn) + ret %res +} + +define @scvtflt_f32_i16( %zn) { +; CHECK-LABEL: scvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16( %zn) + ret %res +} + +define @scvtflt_f64_i32( %zn) { +; CHECK-LABEL: scvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.d, z0.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32( %zn) + ret %res +} + +define @ucvtflt_f16_i8( %zn) { +; CHECK-LABEL: ucvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.h, z0.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8( %zn) + ret %res +} + +define @ucvtflt_f32_i16( %zn) { +; CHECK-LABEL: ucvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16( %zn) + ret %res +} + +define @ucvtflt_f64_i32( %zn) { +; CHECK-LABEL: ucvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.d, z0.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32( %zn) + ret %res +} + diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll new file mode 100644 index 0000000000000..7e05793cabcc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING +; +; FCVTZSN +; + +define @fcvtzsn_i8_f16( %zn1, %zn2) { +; CHECK-LABEL: fcvtzsn_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i8_f16: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzsn.x2.i8f16( %zn1, %zn2) + ret %res +} + +define @fcvtzsn_i16_f32( %zn1, %zn2) { +; CHECK-LABEL: fcvtzsn_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i16_f32: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzsn.x2.i16f32( %zn1, %zn2) + ret %res +} + +define @fcvtzsn_i32_f64( %zn1, %zn2) { +; CHECK-LABEL: fcvtzsn_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i32_f64: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzsn.x2.i32f64( %zn1, %zn2) + ret %res +} + +; +; FCVTZUN +; + +define @fcvtzun_i8_f16( %zn1, %zn2) { +; CHECK-LABEL: fcvtzun_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i8_f16: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzun.x2.i8f16( %zn1, %zn2) + ret %res +} + +define @fcvtzun_i16_f32( %zn1, %zn2) { +; CHECK-LABEL: fcvtzun_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i16_f32: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzun.x2.i16f32( %zn1, %zn2) + ret %res +} + +define @fcvtzun_i32_f64( %zn1, %zn2) { +; CHECK-LABEL: fcvtzun_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i32_f64: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; CHECK-STREAMING-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtzun.x2.i32f64( %zn1, %zn2) + ret %res +} +