SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- RI(ST), ST(ST) {}
+ RI(ST), ST(ST) {
+ SchedModel.init(&ST);
+}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
unsigned Lat = 0, Count = 0;
for (++I; I != E && I->isBundledWithPred(); ++I) {
++Count;
- Lat = std::max(Lat, getInstrLatency(ItinData, *I, PredCost));
+ Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I));
}
return Lat + Count - 1;
}
- return AMDGPUGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+ return SchedModel.computeInstrLatency(&MI);
}
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
private:
const SIRegisterInfo RI;
const GCNSubtarget &ST;
+ TargetSchedModel SchedModel;
// The inverse predicate should have the negative value.
enum BranchPredicate {
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v8, v6
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_max_i16 v6, v6, v7
-; GFX9-NEXT: global_load_short_d16 v7, v[2:3], off offset:4
+; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: global_load_short_d16 v8, v[0:1], off offset:4
+; GFX9-NEXT: global_load_short_d16 v9, v[2:3], off offset:4
+; GFX9-NEXT: v_pk_max_i16 v6, v6, v7
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_max_i16 v0, v8, v7
+; GFX9-NEXT: v_pk_max_i16 v0, v8, v9
; GFX9-NEXT: global_store_dword v[4:5], v6, off
; GFX9-NEXT: global_store_short v[4:5], v0, off offset:4
; GFX9-NEXT: s_endpgm
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3)
+ ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; GCN: $m0 = S_MOV_B32 0
; GCN: $vgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GCN: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec {