return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
}
-static bool getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
+static bool isRFE(unsigned Opcode) {
+ return Opcode == AMDGPU::S_RFE_B64;
+}
+
+static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
return NoopHazard;
+ if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
+ return NoopHazard;
+
return NoHazard;
}
if (isSSetReg(MI->getOpcode()))
return std::max(0, checkSetRegHazards(MI));
+ if (isRFE(MI->getOpcode()))
+ return std::max(0, checkRFEHazards(MI));
+
return 0;
}
int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
return RWLaneWaitStates - WaitStatesSince;
}
+
+int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
+
+ if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ const int RFEWaitStates = 1;
+
+ auto IsHazardFn = [TII] (MachineInstr *MI) {
+ return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
+ };
+ int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+ return RFEWaitStates - WaitStatesNeeded;
+}
int createsVALUHazard(const MachineInstr &MI);
int checkVALUHazards(MachineInstr *VALU);
int checkRWLaneHazards(MachineInstr *RWLane);
+ int checkRFEHazards(MachineInstr *RFE);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
define void @s_setreg() { ret void }
define void @vmem_gt_8dw_store() { ret void }
define void @readwrite_lane() { ret void }
+ define void @rfe() { ret void }
...
---
# GCN-LABEL: name: div_fmas
S_ENDPGM
...
+
+...
+---
+
+# GCN-LABEL: name: rfe
+
+# GCN-LABEL: bb.0:
+# GCN: S_SETREG
+# VI: S_NOP
+# GCN-NEXT: S_RFE_B64
+
+# GCN-LABEL: bb.1:
+# GCN: S_SETREG
+# GCN-NEXT: S_RFE_B64
+
+name: rfe
+
+body: |
+ bb.0:
+ successors: %bb.1
+ S_SETREG_B32 %sgpr0, 3
+ S_RFE_B64 %sgpr2_sgpr3
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_SETREG_B32 %sgpr0, 0
+ S_RFE_B64 %sgpr2_sgpr3
+ S_ENDPGM
+
+...