AMDGPU: Force skip over s_sendmsg and exp instructions

author Nicolai Haehnle <nhaehnle@gmail.com>

Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)

committer Nicolai Haehnle <nhaehnle@gmail.com>

Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)
author Nicolai Haehnle <nhaehnle@gmail.com>
Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)
committer Nicolai Haehnle <nhaehnle@gmail.com>
Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp

index 61c8f359e1689513aefee9b1c8947d2e0bd8da8d..dc9397cf7b85e900ad6b308ee35788d7607d2915 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
            I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
          return true;
  
-      // V_READFIRSTLANE/V_READLANE destination register may be used as operand
-      // by some SALU instruction. If exec mask is zero vector instruction
-      // defining the register that is used by the scalar one is not executed
-      // and scalar instruction will operate on undefined data. For
-      // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
-      if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
-          (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
          return true;
-      }
-
-      if (I->isInlineAsm()) {
-        const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
-        const char *AsmStr = I->getOperand(0).getSymbolName();
-
-        // inlineasm length estimate is number of bytes assuming the longest
-        // instruction.
-        uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
-        NumInstr += MaxAsmSize / MAI->getMaxInstLength();
-      } else {
-        ++NumInstr;
-      }
  
+      ++NumInstr;
        if (NumInstr >= SkipThreshold)
          return true;
      }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 6c85c92454c39801f35c5ad319cc4cc4296b7d64..f3745382a6f4bfbf33bf5dce44e13bdcbad0919e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
           changesVGPRIndexingMode(MI);
  }
  
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+
+  if (MI.mayStore() && isSMRD(MI))
+    return true; // scalar store or atomic
+
+  // These instructions cause shader I/O that may cause hardware lockups
+  // when executed with an empty EXEC mask.
+  //
+  // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
+  //       EXEC = 0, but checking for that case here seems not worth it
+  //       given the typical code patterns.
+  if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
+      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+    return true;
+
+  if (MI.isInlineAsm())
+    return true; // conservative assumption
+
+  // These are like SALU instructions in terms of effects, so it's questionable
+  // whether we should return true for those.
+  //
+  // However, executing them with EXEC = 0 causes them to operate on undefined
+  // data, which we avoid by returning true here.
+  if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+    return true;
+
+  return false;
+}
+
  bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
    switch (Imm.getBitWidth()) {
    case 32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h

index 0a735257d34e4c43076455d88385e97c72c22e8f..d681b926504ede2d3495ac5a30e793bf0eb628ea 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -597,6 +597,9 @@ public:
      return !RI.isSGPRReg(MRI, Dest);
    }
  
+  /// Whether we must prevent this instruction from executing with EXEC = 0.
+  bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+
    bool isInlineConstant(const APInt &Imm) const;
  
    bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll

index 6d2de108829d3386af86f2f3016a218b78d6ac81..cdfe9b460a01689b8e85beea0255b71a0f634d0b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -480,5 +480,65 @@ define amdgpu_kernel void @test_export_vm_i32() #0 {
    ret void
  }
  
+; GCN-LABEL: {{^}}test_if_export_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+  %cc = icmp eq i32 %flag, 0
+  br i1 %cc, label %end, label %exp
+
+exp:
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
+  br label %end
+
+end:
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_vm_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+  %cc = icmp eq i32 %flag, 0
+  br i1 %cc, label %end, label %exp
+
+exp:
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
+  br label %end
+
+end:
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_done_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+  %cc = icmp eq i32 %flag, 0
+  br i1 %cc, label %end, label %exp
+
+exp:
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  br label %end
+
+end:
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+  %cc = icmp eq i32 %flag, 0
+  br i1 %cc, label %end, label %exp
+
+exp:
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
+  br label %end
+
+end:
+  ret void
+}
+
  attributes #0 = { nounwind }
  attributes #1 = { nounwind inaccessiblememonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll

index 594f7604879061d5b23346ad06c5f113d173ab94..4f8cd6f682e68f8bc909cfea60a82a5e0dbdff1e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
@@ -136,6 +136,21 @@ body:
    ret void
  }
  
+; GCN-LABEL: {{^}}if_sendmsg:
+; GCN: s_cbranch_execz
+; GCN: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
+define amdgpu_gs void @if_sendmsg(i32 %flag) #0 {
+  %cc = icmp eq i32 %flag, 0
+  br i1 %cc, label %sendmsg, label %end
+
+sendmsg:
+  call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
+  br label %end
+
+end:
+  ret void
+}
+
  declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
  declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
  
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

index 49c171e03de29b1a9690218044f264f2b5853d34..42a28b9527391240b33604c5932e5d02132618b4 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -72,10 +72,18 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
  ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
+; CHECK-NEXT: s_cbranch_execnz BB6_2
  ; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: exp
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB6_2:
  ; CHECK: v_mov_b32_e64 v7, -1
  ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
-; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: s_cbranch_execnz BB6_4
+; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT: exp
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB6_4:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
    call void @llvm.AMDGPU.kill(float %x)
author	Nicolai Haehnle <nhaehnle@gmail.com>
	Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)
committer	Nicolai Haehnle <nhaehnle@gmail.com>
	Mon, 30 Jul 2018 09:23:59 +0000 (09:23 +0000)
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll		patch \| blob \| history