AMDGPU: Materialize frame index before add
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 29 Nov 2016 19:20:48 +0000 (19:20 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 29 Nov 2016 19:20:48 +0000 (19:20 +0000)
It isn't generally safe to fold the frame index
directly into the operand since it will possibly
not be an inline immediate after it is expanded.

This surprisingly seems to produce better code, since
the FI doesn't prevent folding other immediate operands.

llvm-svn: 288185

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll

index 1d933da..936bb8b 100644 (file)
@@ -245,12 +245,17 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
+  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     .addImm(Offset);
+  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
+    .addFrameIndex(FrameIdx);
+
   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
     .addReg(UnusedCarry, RegState::Define | RegState::Dead)
     .addReg(OffsetReg, RegState::Kill)
-    .addFrameIndex(FrameIdx);
+    .addReg(FIReg);
 }
 
 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
index dc01f76..92c592b 100644 (file)
@@ -7,15 +7,16 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 
+; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
+
 ; FIXME: add 0?
-; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140
-; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
+; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]]
 
 ; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
 ; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
 
 ; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
-; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]]
 
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen