From ffd6aaf5b6663836700663ae0f9a2d80f4056689 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 25 Jun 2022 10:25:11 -0400 Subject: [PATCH] AMDGPU: Make packed 32-bit instructions rematerializable --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 +- llvm/test/CodeGen/AMDGPU/remat-vop.mir | 138 ++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index af75691..2959eca 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -667,7 +667,7 @@ def MAIInstInfoTable : GenericTable { let PrimaryKeyName = "getMAIInstInfoHelper"; } -let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in { +let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in { defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir index b34afe6..001799c 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -5247,3 +5247,141 @@ body: | S_NOP 0, implicit %3 S_ENDPGM 0, implicit %0 ... + +--- +name: test_remat_v_pk_fma_f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: test_remat_v_pk_fma_f32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %3:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0, implicit %0 +... + +--- +name: test_no_remat_v_pk_fma_f32 +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: test_no_remat_v_pk_fma_f32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %3:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0, implicit %0 +... + +--- +name: test_remat_v_pk_mul_f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: test_remat_v_pk_mul_f32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + %2:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + %3:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0, implicit %0 +... + +--- +name: test_remat_v_pk_add_f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: test_remat_v_pk_add_f32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + %2:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + %3:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0, implicit %0 +... + +--- +name: test_remat_v_pk_mov_b32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: test_remat_v_pk_mov_b32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 9, $vgpr0_vgpr1, 9, $vgpr0_vgpr1, 12, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 10, $vgpr0_vgpr1, 10, $vgpr0_vgpr1, 13, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_PK_MOV_B32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $exec + %2:vreg_64_align2 = V_PK_MOV_B32 9, %0, 9, %0, 12, 0, 0, 0, 0, implicit $exec + %3:vreg_64_align2 = V_PK_MOV_B32 10, %0, 10, %0, 13, 0, 0, 0, 0, implicit $exec + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0, implicit %0 +... -- 2.7.4