unsigned Opcode;
const int16_t *SubIndices;
- if (AMDGPU::M0 == DestReg) {
- // Check if M0 isn't already set to this value
- for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
- I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
-
- if (!I->definesRegister(AMDGPU::M0))
- continue;
-
- unsigned Opc = I->getOpcode();
- if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
- break;
-
- if (!I->readsRegister(SrcReg))
- break;
-
- // The copy isn't necessary
- return;
- }
- }
-
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
/********** Interpolation Paterns **********/
/********** ===================== **********/
+// The value of $params is constant through out the entire kernel.
+// We need to use S_MOV_B32 $params, because CSE ignores copies, so
+// without it we end up with a lot of redundant moves.
+
def : Pat <
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
def : Pat <
- (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
- imm:$attr_chan, imm:$attr, i32:$params),
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
(EXTRACT_SUBREG $ij, sub1),
- imm:$attr_chan, imm:$attr, $params)
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
/********** ================== **********/