switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_LOAD_DWORD_IMM:
case AMDGPU::GLOBAL_LOAD_DWORD:
case AMDGPU::FLAT_STORE_DWORD:
return 1;
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
case AMDGPU::FLAT_STORE_DWORDX3:
return 3;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
case AMDGPU::FLAT_STORE_DWORDX4:
return 4;
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
return 8;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return S_BUFFER_LOAD_IMM;
- // For the purposes of this optimization SGPR variants of buffer loads
- // are considered to be zero-offsetted SGPR_IMM loads.
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
- // For the purposes of this optimization SGPR variants of buffer loads
- // are considered to be zero-offsetted SGPR_IMM loads.
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
switch (Opc) {
default:
return Result;
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
Offset = 0;
} else {
int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
- Offset = OffsetIdx == -1 ? 0 : I->getOperand(OffsetIdx).getImm();
+ Offset = I->getOperand(OffsetIdx).getImm();
}
if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase));
if (CI.InstClass == S_BUFFER_LOAD_SGPR_IMM)
New.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset));
- // For convenience, when SGPR_IMM buffer loads are merged into a
- // zero-offset load, we generate its SGPR variant.
- if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::offset))
- New.addImm(MergedOffset);
+ New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
default:
return 0;
case 2:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
case 4:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
case 8:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
}
case S_LOAD_IMM:
switch (Width) {
---
# CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm
-# CHECK: S_BUFFER_LOAD_DWORDX4_SGPR %0, %1, 0 :: (dereferenceable invariant load (s128), align 4)
+# CHECK: S_BUFFER_LOAD_DWORDX4_SGPR_IMM %0, %1, 0, 0 :: (dereferenceable invariant load (s128), align 4)
name: merge_s_buffer_load_sgpr_imm
tracksRegLiveness: true
body: |
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32 = COPY $sgpr4
- %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %1:sreg_32, 0 :: (dereferenceable invariant load (s32))
+ %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
%3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32))
%4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s32))
%5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32))