Opcode == AMDGPU::DS_GWS_BARRIER;
}
+bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
+ // Skip the full operand and register alias search modifiesRegister
+ // does. There's only a handful of instructions that touch this, it's only an
+ // implicit def, and doesn't alias any other registers.
+ if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) {
+ for (; ImpDef && *ImpDef; ++ImpDef) {
+ if (*ImpDef == AMDGPU::MODE)
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
+ // A mode change is a scalar operation that influences vector instructions.
+ if (modifiesModeRegister(MI))
+ return true;
+
// These are like SALU instructions in terms of effects, so it's questionable
// whether we should return true for those.
//
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+# Make sure mandatory skips are not removed around mode defs.
+# FIXME: -amdgpu-skip-threshold seems to be backwards.
+
+---
+
+name: need_skip_setreg_imm32_b32
+body: |
+ ; CHECK-LABEL: name: need_skip_setreg_imm32_b32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+ ; CHECK: bb.2:
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+
+name: need_skip_setreg_b32
+body: |
+ ; CHECK-LABEL: name: need_skip_setreg_b32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
+ ; CHECK: bb.2:
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ liveins: $sgpr0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+
+name: need_skip_denorm_mode
+body: |
+ ; CHECK-LABEL: name: need_skip_denorm_mode
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_DENORM_MODE 3, implicit-def $mode, implicit $mode
+ ; CHECK: bb.2:
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_DENORM_MODE 3, implicit-def $mode, implicit $mode
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+
+name: need_skip_round_mode
+body: |
+ ; CHECK-LABEL: name: need_skip_round_mode
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_ROUND_MODE 3, implicit-def $mode, implicit $mode
+ ; CHECK: bb.2:
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_ROUND_MODE 3, implicit-def $mode, implicit $mode
+
+ bb.2:
+ S_ENDPGM 0
+...
ret void
}
+; CHECK-LABEL: {{^}}skip_mode_switch:
+; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: s_cbranch_execz
+; CHECK: s_setreg_imm32
+; CHECK: s_or_b64 exec, exec
+define void @skip_mode_switch(i32 %arg) {
+entry:
+ %cmp = icmp eq i32 %arg, 0
+ br i1 %cmp, label %bb.0, label %bb.1
+
+bb.0:
+ call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
+ br label %bb.1
+
+bb.1:
+ ret void
+}
+
declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.amdgcn.kill(i1) #0
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone speculatable }