[AMDGPU] copyPhysReg() for 16 bit SGPR subregs
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 15 Apr 2020 23:16:13 +0000 (16:16 -0700)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 17 Apr 2020 18:59:39 +0000 (11:59 -0700)
Differential Revision: https://reviews.llvm.org/D78255

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir [new file with mode: 0644]

index 92d0440..60569df 100644 (file)
@@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
 static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, MCRegister DestReg,
-                              MCRegister SrcReg, bool KillSrc) {
+                              MCRegister SrcReg, bool KillSrc,
+                              const char *Msg = "illegal SGPR to VGPR copy") {
   MachineFunction *MF = MBB.getParent();
-  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
-                                        "illegal SGPR to VGPR copy",
-                                        DL, DS_Error);
+  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
   LLVMContext &C = MF->getFunction().getContext();
   C.diagnose(IllegalCopy);
 
@@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
+  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
+      RC == &AMDGPU::SGPR_LO16RegClass) {
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
-           AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
-
-    bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
-    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
-    DestReg = RI.getMatchingSuperReg(DestReg,
-                                     DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                     &AMDGPU::VGPR_32RegClass);
-    SrcReg = RI.getMatchingSuperReg(SrcReg,
-                                    SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                    &AMDGPU::VGPR_32RegClass);
-
-    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
+           AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+           AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+    bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
+    bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
+                   RC == &AMDGPU::SGPR_LO16RegClass);
+    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+                  AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    MCRegister NewDestReg =
+      RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             DstRC);
+    MCRegister NewSrcReg =
+      RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             SrcRC);
+
+    if (IsSGPRDst) {
+      if (!IsSGPRSrc) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+        return;
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    if (IsSGPRSrc && !ST.hasSDWAScalar()) {
+      if (!DstLow || !SrcLow) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+                          "Cannot use hi16 subreg on VI!");
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
       .addImm(0) // src0_modifiers
-      .addReg(SrcReg)
+      .addReg(NewSrcReg)
       .addImm(0) // clamp
       .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
       .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
       .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
-      .addReg(DestReg, RegState::Implicit | RegState::Undef);
+      .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
     // First implicit operand is $exec.
     MIB->tieOperands(0, MIB->getNumOperands() - 1);
     return;
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
new file mode 100644 (file)
index 0000000..40bfd60
--- /dev/null
@@ -0,0 +1,31 @@
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
+# GCN: ; illegal copy v0.l to s1.l
+# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
+name: lo_to_lo_illegal_vgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GFX8: ; illegal copy s0.l to v1.h
+# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...
index f5c507b..f5b7f11 100644 (file)
@@ -193,3 +193,14 @@ body:             |
     $vgpr2 = COPY killed $vgpr1
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
new file mode 100644 (file)
index 0000000..21fc79d
--- /dev/null
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...