ret void
}
+define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0 {
+; GFX900-LABEL: infer_as_before_atomic:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-NEXT: s_mov_b64 s[2:3], 0
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v1, s4
+; GFX900-NEXT: BB9_1: ; %atomicrmw.start
+; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX900-NEXT: s_cbranch_execnz BB9_1
+; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX900-NEXT: s_endpgm
+;
+; GFX908-LABEL: infer_as_before_atomic:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX908-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: global_atomic_add_f32 v0, v1, s[0:1]
+; GFX908-NEXT: s_endpgm
+;
+; GFX90A-LABEL: infer_as_before_atomic:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: global_atomic_add_f32 v0, v1, s[0:1]
+; GFX90A-NEXT: s_endpgm
+;
+; GFX10-LABEL: infer_as_before_atomic:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v1, s2
+; GFX10-NEXT: s_mov_b32 s2, 0
+; GFX10-NEXT: BB9_1: ; %atomicrmw.start
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: v_add_f32_e32 v0, 1.0, v1
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v1, v0
+; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
+; GFX10-NEXT: s_cbranch_execnz BB9_1
+; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX10-NEXT: s_endpgm
+ %load = load float*, float* addrspace(4)* %arg
+ %v = atomicrmw fadd float* %load, float 1.0 syncscope("agent-one-as") monotonic, align 4
+ ret void
+}
+
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" }
attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" }
; GCN-O0-NEXT: Fix function bitcasts for AMDGPU
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Early propagate attributes from kernels to functions
-; GCN-O0-NEXT: Expand Atomic instructions
; GCN-O0-NEXT: AMDGPU Lower Intrinsics
; GCN-O0-NEXT: AMDGPU Inline All Functions
; GCN-O0-NEXT: CallGraph Construction
; GCN-O0-NEXT: Lower OpenCL enqueued blocks
; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O0-NEXT: FunctionPass Manager
+; GCN-O0-NEXT: Expand Atomic instructions
; GCN-O0-NEXT: Lower Garbage Collection Instructions
; GCN-O0-NEXT: Shadow Stack GC Lowering
; GCN-O0-NEXT: Lower constant intrinsics
; GCN-O1-NEXT: Fix function bitcasts for AMDGPU
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Early propagate attributes from kernels to functions
-; GCN-O1-NEXT: Expand Atomic instructions
; GCN-O1-NEXT: AMDGPU Lower Intrinsics
; GCN-O1-NEXT: AMDGPU Inline All Functions
; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Infer address spaces
+; GCN-O1-NEXT: Expand Atomic instructions
; GCN-O1-NEXT: AMDGPU Promote Alloca
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: SROA
; GCN-O1-OPTS-NEXT: Fix function bitcasts for AMDGPU
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions
-; GCN-O1-OPTS-NEXT: Expand Atomic instructions
; GCN-O1-OPTS-NEXT: AMDGPU Lower Intrinsics
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
; GCN-O1-OPTS-NEXT: CallGraph Construction
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Infer address spaces
+; GCN-O1-OPTS-NEXT: Expand Atomic instructions
; GCN-O1-OPTS-NEXT: AMDGPU Promote Alloca
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: SROA
; GCN-O2-NEXT: Fix function bitcasts for AMDGPU
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Early propagate attributes from kernels to functions
-; GCN-O2-NEXT: Expand Atomic instructions
; GCN-O2-NEXT: AMDGPU Lower Intrinsics
; GCN-O2-NEXT: AMDGPU Inline All Functions
; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Infer address spaces
+; GCN-O2-NEXT: Expand Atomic instructions
; GCN-O2-NEXT: AMDGPU Promote Alloca
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: SROA
; GCN-O3-NEXT: Fix function bitcasts for AMDGPU
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Early propagate attributes from kernels to functions
-; GCN-O3-NEXT: Expand Atomic instructions
; GCN-O3-NEXT: AMDGPU Lower Intrinsics
; GCN-O3-NEXT: AMDGPU Inline All Functions
; GCN-O3-NEXT: CallGraph Construction
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Infer address spaces
+; GCN-O3-NEXT: Expand Atomic instructions
; GCN-O3-NEXT: AMDGPU Promote Alloca
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: SROA