If amdgpu-unsafe-fp-atomics is specified, allow {flat|global}_atomic_add_f32 even if atomic modes don't match.
Differential Revision: https://reviews.llvm.org/D95391
unsigned AS = RMW->getPointerAddressSpace();
if (AS == AMDGPUAS::GLOBAL_ADDRESS && Subtarget->hasAtomicFaddInsts()) {
- if (!fpModeMatchesGlobalFPAtomicMode(RMW))
+ if (!fpModeMatchesGlobalFPAtomicMode(RMW) ||
+ RMW->getFunction()->getFnAttribute("amdgpu-unsafe-fp-atomics")
+ .getValueAsString() != "true")
return AtomicExpansionKind::CmpXChg;
return RMW->use_empty() ? AtomicExpansionKind::None :
ret void
}
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign"}
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }
ret double %res
}
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }