[AMDGPU][GFX908] IndirectCopyToAGPR: Confirm modified register is dst reg of accvgpr_...
authorJeffrey Byrnes <JeffreyByrnes@amd.com>
Thu, 4 May 2023 16:57:15 +0000 (09:57 -0700)
committerJeffrey Byrnes <Jeffrey.Byrnes@amd.com>
Fri, 12 May 2023 19:38:29 +0000 (12:38 -0700)
IndirectCopyToAGPR should be reworked as to avoid optimizing during copy lowering. However, as it stands, the code is buggy. This patch replaces the call to definesRegister with modifiesRegister, and confirms that the dest reg of the found accvgpr_write is in fact the src reg of our copy.

Differential Revision: https://reviews.llvm.org/D149873

Change-Id: Id8a61659ac15565dcb970069d0624f0925a46e6d

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir

index a6e11e3194e465f6a566c280451191a67d455811..4f695a88027a692c4b185e7a7dae0b5a55142ffb 100644 (file)
@@ -578,9 +578,12 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
   if (!RegsOverlap) {
     for (auto Def = MI, E = MBB.begin(); Def != E; ) {
       --Def;
-      if (!Def->definesRegister(SrcReg, &RI))
+
+      if (!Def->modifiesRegister(SrcReg, &RI))
         continue;
-      if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
+
+      if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
+          Def->getOperand(0).getReg() != SrcReg)
         break;
 
       MachineOperand &DefOp = Def->getOperand(1);
index 7607ec055e7da327db5099f7ca40d016ce0dba2b..339c6e34bbcff888691b39af8fad9e2af3b2b98d 100644 (file)
@@ -900,9 +900,11 @@ body:             |
     ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
     ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
     ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
-    ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
-    ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
-    ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
+    ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2
+    ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr3_agpr4
+    ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr1_agpr2
+    ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit $exec
     ; GFX90A-LABEL: name: a2_to_a2_implicit_defs
     ; GFX90A: liveins: $agpr0_agpr1
     ; GFX90A-NEXT: {{  $}}