AMDGPU/GlobalISel: Enable TableGen'd instruction selector
authorTom Stellard <tstellar@redhat.com>
Thu, 10 May 2018 20:53:06 +0000 (20:53 +0000)
committerTom Stellard <tstellar@redhat.com>
Thu, 10 May 2018 20:53:06 +0000 (20:53 +0000)
Reviewers: arsenm, nhaehnle

Reviewed By: arsenm

Subscribers: kzhuravl, wdng, mgorny, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D45994

llvm-svn: 332039

llvm/lib/Target/AMDGPU/AMDGPUGISel.td [new file with mode: 0644]
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir [new file with mode: 0644]

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
new file mode 100644 (file)
index 0000000..74b73de
--- /dev/null
@@ -0,0 +1,42 @@
+//===-- AMDGPUGIsel.td - AMDGPU GlobalISel Patterns---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This files contains patterns that should only be used by GlobalISel.  For
+// example patterns for V_* instructions that have S_* equivalents.
+// SelectionDAG does not support selecting V_* instructions.
+//===----------------------------------------------------------------------===//
+
+include "AMDGPU.td"
+
+def sd_vsrc0 : ComplexPattern<i32, 1, "">;
+def gi_vsrc0 :
+    GIComplexOperandMatcher<s32, "selectVSRC0">,
+    GIComplexPatternEquiv<sd_vsrc0>;
+
+class GISelSop2Pat <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt SReg_32:$src0), (src1_vt SReg_32:$src1))),
+  (inst src0_vt:$src0, src1_vt:$src1)
+>;
+
+class GISelVop2Pat <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
+  (inst src0_vt:$src0, src1_vt:$src1)
+>;
+
+def : GISelSop2Pat <or, S_OR_B32, i32>;
+def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
index 7cb6ef0..42d91c0 100644 (file)
@@ -17,6 +17,9 @@
 #include "AMDGPURegisterBankInfo.h"
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 using namespace llvm;
 
+#define GET_GLOBALISEL_IMPL
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
-    const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
+    const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI,
+    const AMDGPUTargetMachine &TM)
     : InstructionSelector(), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
+      TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
+      STI(STI),
+      EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+      ,AMDGPUASI(STI.getAMDGPUAS())
+{
+}
+
+const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
 
 MachineOperand
 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
@@ -416,6 +437,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   switch (I.getOpcode()) {
   default:
     break;
+  case TargetOpcode::G_OR:
+    return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_ADD:
     return selectG_ADD(I);
   case TargetOpcode::G_CONSTANT:
@@ -429,3 +452,13 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   }
   return false;
 }
+
+///
+/// This will select either an SGPR or VGPR operand and will save us from
+/// having to write an extra tablegen pattern.
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
+  }};
+}
index 715c488..a8c5a15 100644 (file)
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 
+namespace {
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+}
+
 namespace llvm {
 
 class AMDGPUInstrInfo;
 class AMDGPURegisterBankInfo;
+class AMDGPUSubtarget;
 class MachineInstr;
 class MachineOperand;
 class MachineRegisterInfo;
@@ -33,9 +40,11 @@ class SISubtarget;
 class AMDGPUInstructionSelector : public InstructionSelector {
 public:
   AMDGPUInstructionSelector(const SISubtarget &STI,
-                            const AMDGPURegisterBankInfo &RBI);
+                            const AMDGPURegisterBankInfo &RBI,
+                            const AMDGPUTargetMachine &TM);
 
   bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+  static const char *getName();
 
 private:
   struct GEPInfo {
@@ -46,6 +55,9 @@ private:
     GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
   };
 
+  /// tblgen-erated 'select' implementation.
+  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
   MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
   bool selectG_CONSTANT(MachineInstr &I) const;
   bool selectG_ADD(MachineInstr &I) const;
@@ -57,9 +69,23 @@ private:
   bool selectG_LOAD(MachineInstr &I) const;
   bool selectG_STORE(MachineInstr &I) const;
 
+  InstructionSelector::ComplexRendererFns
+  selectVSRC0(MachineOperand &Root) const;
+
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   const AMDGPURegisterBankInfo &RBI;
+  const AMDGPUTargetMachine &TM;
+  const SISubtarget &STI;
+  bool EnableLateStructurizeCFG;
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "AMDGPUGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+
 protected:
   AMDGPUAS AMDGPUASI;
 };
index 850715f..01ff028 100644 (file)
@@ -381,7 +381,7 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 
   RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
   InstSelector.reset(new AMDGPUInstructionSelector(
-      *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
+      *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
 }
 
 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
index 598ec36..e3f0f3e 100644 (file)
@@ -15,6 +15,9 @@ tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables)
 tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
 
+set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
+tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
+
 add_public_tablegen_target(AMDGPUCommonTableGen)
 
 add_llvm_target(AMDGPUCodeGen
index f6e2fbc..b30aa31 100644 (file)
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SIRegisterInfo.h"
+#include "AMDGPURegisterBankInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
@@ -1562,3 +1563,23 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
     return Empty;
   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
 }
+
+const TargetRegisterClass *
+SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
+                                         const MachineRegisterInfo &MRI) const {
+  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
+  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
+  if (!RB)
+    return nullptr;
+
+  switch (Size) {
+  case 32:
+    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
+                                                  &AMDGPU::SReg_32_XM0RegClass;
+  case 64:
+    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
+                                                   &AMDGPU::SReg_64_XEXECRegClass;
+  default:
+    llvm_unreachable("not implemented");
+  }
+}
index 1775c94..f7f0af5 100644 (file)
@@ -227,6 +227,9 @@ public:
     // Not a callee saved register.
     return AMDGPU::SGPR30_SGPR31;
   }
+  const TargetRegisterClass *
+  getConstrainedRegClassForOperand(const MachineOperand &MO,
+                                 const MachineRegisterInfo &MRI) const override;
 
 private:
   void buildSpillLoadStore(MachineBasicBlock::iterator MI,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
new file mode 100644 (file)
index 0000000..60e1a4c
--- /dev/null
@@ -0,0 +1,45 @@
+# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+
+--- |
+  define void @or(i32 addrspace(1)* %global0) {ret void}
+...
+---
+
+name:            or
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: or
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
+    ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:vgpr(s32) = COPY $vgpr0
+    %3:vgpr(s64) = COPY $vgpr3_vgpr4
+    %4:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_CONSTANT i32 4096
+
+    ; or ss
+    ; GCN: [[SS:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[SGPR0]], [[SGPR1]]
+    %6:sgpr(s32) = G_OR %0, %1
+
+    ; or vs
+    ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SS]], [[VGPR0]]
+    %7:vgpr(s32) = G_OR %2, %6
+
+    ; or sv
+    ; GCN: [[SV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SS]], [[VS]]
+    %8:vgpr(s32) = G_OR %6, %7
+
+    ; or vv
+    ; GCN: [[VV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SV]], [[VGPR0]]
+    %9:vgpr(s32) = G_OR %8, %2
+
+    G_STORE %9, %3 :: (store 4 into %ir.global0)
+
+...
+---