[AMDGPU] Fold copy (copy vgpr)
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 27 Sep 2018 18:55:20 +0000 (18:55 +0000)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 27 Sep 2018 18:55:20 +0000 (18:55 +0000)
This allows to reduce a number of used VGPRs in some cases.

Differential Revision: https://reviews.llvm.org/D52577

llvm-svn: 343249

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir [new file with mode: 0644]

index d4b64ab..bd0bc73 100644 (file)
@@ -477,6 +477,20 @@ void SIFoldOperands::foldOperand(
     UseMI->setDesc(TII->get(MovOp));
     CopiesToReplace.push_back(UseMI);
   } else {
+    if (UseMI->isCopy() && OpToFold.isReg() &&
+        TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+        TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
+        TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+        TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+        !UseMI->getOperand(1).getSubReg()) {
+      UseMI->getOperand(1).setReg(OpToFold.getReg());
+      UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+      UseMI->getOperand(1).setIsKill(false);
+      CopiesToReplace.push_back(UseMI);
+      OpToFold.setIsKill(false);
+      return;
+    }
+
     const MCInstrDesc &UseDesc = UseMI->getDesc();
 
     // Don't fold into target independent nodes.  Target independent opcodes
diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
new file mode 100644 (file)
index 0000000..7d1c75c
--- /dev/null
@@ -0,0 +1,27 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: fold_vgpr_copy
+# GCN:      %0:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %4:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %3:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE2_B32_gfx9 %0.sub0, killed %4, killed %3, 0, 1, 0, implicit $exec
+
+---
+name:            fold_vgpr_copy
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: vgpr_32 }
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %4 = IMPLICIT_DEF
+    %3 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub0
+    %2:vgpr_32 = COPY %1
+    DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec
+
+...