}
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
SDPatternOperator atomic_rtn = null_frag,
SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> :
- FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
- FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
-
+ RegisterClass data_rc = vdst_rc> {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
+ defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
+ }
+}
//===----------------------------------------------------------------------===//
// Flat Instructions
} // End SubtargetPredicate = isGFX7GFX10
-let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
VReg_64, i64, atomic_dec_global_64>;
} // End is_flat_global = 1
-} // End SubtargetPredicate = HasFlatGlobalInsts
let SubtargetPredicate = HasFlatScratchInsts in {
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
ret void
}
+
+; Make sure this artificially selects with an incorrect subtarget, but the feature set.
+; GCN-LABEL: {{^}}global_atomic_fadd_ret_f32_wrong_subtarget:
+define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 {
+ %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
+ store float %result, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_wrong_subtarget:
+define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 {
+ %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
+ ret void
+}
+
+attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }
call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
ret void
}
+
+; Make sure this artificially selects with an incorrect subtarget, but
+; the feature set.
+; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
+; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off
+define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 {
+ call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data)
+ ret void
+}
+
+attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }