case Intrinsic::amdgcn_s_barrier:
return selectSBarrier(I);
case Intrinsic::amdgcn_global_atomic_fadd:
- return selectGlobalAtomicFaddIntrinsic(I);
+ return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3));
default: {
return selectImpl(I, *CoverageInfo);
}
bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
MachineInstr &I) const {
+ if (I.getOpcode() == TargetOpcode::G_ATOMICRMW_FADD) {
+ const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
+ return selectGlobalAtomicFadd(I, I.getOperand(1), I.getOperand(2));
+ }
+
initM0(I);
return selectImpl(I, *CoverageInfo);
}
return true;
}
-bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
- MachineInstr &MI) const{
+bool AMDGPUInstructionSelector::selectGlobalAtomicFadd(
+ MachineInstr &MI, MachineOperand &AddrOp, MachineOperand &DataOp) const {
- if (STI.hasGFX90AInsts())
+ if (STI.hasGFX90AInsts()) {
+ // gfx90a adds return versions of the global atomic fadd instructions so no
+ // special handling is required.
return selectImpl(MI, *CoverageInfo);
+ }
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
// FIXME: This is only needed because tablegen requires number of dst operands
// in match and replace pattern to be the same. Otherwise patterns can be
// exported from SDag path.
- auto Addr = selectFlatOffsetImpl<true>(MI.getOperand(2));
+ auto Addr = selectFlatOffsetImpl<true>(AddrOp);
- Register Data = MI.getOperand(3).getReg();
+ Register Data = DataOp.getReg();
const unsigned Opc = MRI->getType(Data).isVector() ?
AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32;
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
- bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
+ bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp,
+ MachineOperand &DataOp) const;
bool selectBVHIntrinsic(MachineInstr &I) const;
std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
+ auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
if (ST.hasLDSFPAtomics()) {
- auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
- .legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
+ Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
}
+ if (ST.hasAtomicFaddInsts())
+ Atomic.legalFor({{S32, GlobalPtr}});
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
// demarshalling
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -O0 -run-pass=legalizer %s -o - | FileCheck %s
+
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_fadd_global_i32)
+
+---
+name: atomicrmw_fadd_global_i32
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 1)
+...