From c65a9db43e17f0acdd39b76498d1c23e4a70f9a1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 17:22:21 +0000 Subject: [PATCH] AMDGPU: Fix missing immarg for mfma intrinsics llvm-svn: 366230 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 60 ++++++++++++++++++--------- llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll | 23 ++++++++++ 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 1cde3af..bad4216 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1675,83 +1675,103 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll index a72d1da..7609838 100644 --- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -674,3 +674,26 @@ define void @test_interp_p2_f16(float %arg0, float %arg1, i32 %arg2, i32 %arg3, ret void } + +declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32) +define void @test_mfma_f32_32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 %arg4, i32 %arg5) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg3 + ; CHECK-NEXT: %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3) + %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3) + store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg4 + ; CHECK-NEXT: %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3) + %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3) + store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg5 + ; CHECK-NEXT: %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5) + %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5) + store volatile <32 x i32> %val2, <32 x i32> addrspace(1)* undef + + ret void +} -- 2.7.4