From 8dce4333f92276de566bd425006168b1bf905a7b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 5 Dec 2022 11:45:17 -0500 Subject: [PATCH] AMDGPU: Bulk update memory legalizer tests to use opaque pointers --- .../CodeGen/AMDGPU/memory-legalizer-flat-agent.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-flat-nontemporal.ll | 28 +- .../AMDGPU/memory-legalizer-flat-singlethread.ll | 576 ++++++++++----------- .../CodeGen/AMDGPU/memory-legalizer-flat-system.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-flat-volatile.ll | 38 +- .../AMDGPU/memory-legalizer-flat-wavefront.ll | 568 ++++++++++---------- .../AMDGPU/memory-legalizer-flat-workgroup.ll | 552 ++++++++++---------- .../AMDGPU/memory-legalizer-global-agent.ll | 568 ++++++++++---------- .../AMDGPU/memory-legalizer-global-nontemporal.ll | 28 +- .../AMDGPU/memory-legalizer-global-singlethread.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-global-system.ll | 544 +++++++++---------- .../AMDGPU/memory-legalizer-global-volatile.ll | 38 +- .../AMDGPU/memory-legalizer-global-wavefront.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-global-workgroup.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-invalid-syncscope.ll | 28 +- .../CodeGen/AMDGPU/memory-legalizer-local-agent.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-local-nontemporal.ll | 28 +- .../AMDGPU/memory-legalizer-local-singlethread.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-local-system.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-local-volatile.ll | 38 +- .../AMDGPU/memory-legalizer-local-wavefront.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-local-workgroup.ll | 576 ++++++++++----------- .../AMDGPU/memory-legalizer-private-nontemporal.ll | 28 +- .../AMDGPU/memory-legalizer-private-volatile.ll | 28 +- .../AMDGPU/memory-legalizer-store-infinite-loop.ll | 10 +- 25 files changed, 4430 insertions(+), 4430 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index 0863da9..b9cc688 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_agent_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -273,10 +273,10 @@ define amdgpu_kernel void @flat_agent_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -421,10 +421,10 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -583,10 +583,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -700,9 +700,9 @@ define amdgpu_kernel void @flat_agent_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("agent") unordered, align 4 ret void } @@ -816,9 +816,9 @@ define amdgpu_kernel void @flat_agent_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("agent") monotonic, align 4 ret void } @@ -948,9 +948,9 @@ define amdgpu_kernel void @flat_agent_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent") release, align 4 + store atomic i32 %in, ptr %out syncscope("agent") release, align 4 ret void } @@ -1080,9 +1080,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("agent") seq_cst, align 4 ret void } @@ -1196,9 +1196,9 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") monotonic ret void } @@ -1337,9 +1337,9 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire ret void } @@ -1469,9 +1469,9 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") release ret void } @@ -1626,9 +1626,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel ret void } @@ -1783,9 +1783,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst ret void } @@ -1932,10 +1932,10 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -2098,10 +2098,10 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -2264,10 +2264,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -2379,10 +2379,10 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic ret void } @@ -2519,10 +2519,10 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic ret void } @@ -2650,10 +2650,10 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic ret void } @@ -2806,10 +2806,10 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic ret void } @@ -2962,10 +2962,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic ret void } @@ -3102,10 +3102,10 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire ret void } @@ -3242,10 +3242,10 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire ret void } @@ -3398,10 +3398,10 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire ret void } @@ -3554,10 +3554,10 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire ret void } @@ -3710,10 +3710,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire ret void } @@ -3866,10 +3866,10 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst ret void } @@ -4022,10 +4022,10 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst ret void } @@ -4178,10 +4178,10 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst ret void } @@ -4334,10 +4334,10 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst ret void } @@ -4490,10 +4490,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -4633,12 +4633,12 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4791,12 +4791,12 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4952,12 +4952,12 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5126,12 +5126,12 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5300,12 +5300,12 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5458,12 +5458,12 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5616,12 +5616,12 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5790,12 +5790,12 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5964,12 +5964,12 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6138,12 +6138,12 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6312,12 +6312,12 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6486,12 +6486,12 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6660,12 +6660,12 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6834,12 +6834,12 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -7008,12 +7008,12 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -7145,10 +7145,10 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent-one-as") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent-one-as") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -7280,10 +7280,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent-one-as") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent-one-as") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -7436,10 +7436,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent-one-as") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent-one-as") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -7606,10 +7606,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("agent-one-as") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("agent-one-as") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -7723,9 +7723,9 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("agent-one-as") unordered, align 4 ret void } @@ -7839,9 +7839,9 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("agent-one-as") monotonic, align 4 ret void } @@ -7971,9 +7971,9 @@ define amdgpu_kernel void @flat_agent_one_as_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4 + store atomic i32 %in, ptr %out syncscope("agent-one-as") release, align 4 ret void } @@ -8103,9 +8103,9 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("agent-one-as") seq_cst, align 4 ret void } @@ -8219,9 +8219,9 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") monotonic ret void } @@ -8356,9 +8356,9 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire ret void } @@ -8488,9 +8488,9 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") release ret void } @@ -8641,9 +8641,9 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel ret void } @@ -8794,9 +8794,9 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst ret void } @@ -8950,10 +8950,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -9123,10 +9123,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -9296,10 +9296,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -9411,10 +9411,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic ret void } @@ -9547,10 +9547,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic ret void } @@ -9678,10 +9678,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic ret void } @@ -9830,10 +9830,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic ret void } @@ -9982,10 +9982,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic ret void } @@ -10118,10 +10118,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire ret void } @@ -10254,10 +10254,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire ret void } @@ -10406,10 +10406,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire ret void } @@ -10558,10 +10558,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire ret void } @@ -10710,10 +10710,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire ret void } @@ -10862,10 +10862,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst ret void } @@ -11014,10 +11014,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst ret void } @@ -11166,10 +11166,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst ret void } @@ -11318,10 +11318,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst ret void } @@ -11470,10 +11470,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst ret void } @@ -11613,12 +11613,12 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11779,12 +11779,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11940,12 +11940,12 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12122,12 +12122,12 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12304,12 +12304,12 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12470,12 +12470,12 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12636,12 +12636,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12818,12 +12818,12 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13000,12 +13000,12 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13182,12 +13182,12 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13364,12 +13364,12 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13546,12 +13546,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13728,12 +13728,12 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13910,12 +13910,12 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -14092,12 +14092,12 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll index 30d7e56..8a60992 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load i32, i32* %in, align 4, !nontemporal !0 - store i32 %val, i32* %out + %val = load i32, ptr %in, align 4, !nontemporal !0 + store i32 %val, ptr %out ret void } @@ -291,12 +291,12 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid - %val = load i32, i32* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32* %out + %val.gep = getelementptr inbounds i32, ptr %in, i32 %tid + %val = load i32, ptr %val.gep, align 4, !nontemporal !0 + store i32 %val, ptr %out ret void } @@ -428,10 +428,10 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load i32, i32* %in, align 4 - store i32 %val, i32* %out, !nontemporal !0 + %val = load i32, ptr %in, align 4 + store i32 %val, ptr %out, !nontemporal !0 ret void } @@ -581,12 +581,12 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32* %in, align 4 - %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid - store i32 %val, i32* %out.gep, !nontemporal !0 + %val = load i32, ptr %in, align 4 + %out.gep = getelementptr inbounds i32, ptr %out, i32 %tid + store i32 %val, ptr %out.gep, !nontemporal !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll index b879e21..3009726 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_singlethread_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -273,10 +273,10 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -408,10 +408,10 @@ define amdgpu_kernel void @flat_singlethread_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -543,10 +543,10 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -660,9 +660,9 @@ define amdgpu_kernel void @flat_singlethread_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread") unordered, align 4 ret void } @@ -776,9 +776,9 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread") monotonic, align 4 ret void } @@ -892,9 +892,9 @@ define amdgpu_kernel void @flat_singlethread_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread") release, align 4 ret void } @@ -1008,9 +1008,9 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread") seq_cst, align 4 ret void } @@ -1124,9 +1124,9 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") monotonic ret void } @@ -1240,9 +1240,9 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire ret void } @@ -1356,9 +1356,9 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") release ret void } @@ -1472,9 +1472,9 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel ret void } @@ -1588,9 +1588,9 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst ret void } @@ -1724,10 +1724,10 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -1861,10 +1861,10 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -1998,10 +1998,10 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -2113,10 +2113,10 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic ret void } @@ -2228,10 +2228,10 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic ret void } @@ -2343,10 +2343,10 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic ret void } @@ -2458,10 +2458,10 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic ret void } @@ -2573,10 +2573,10 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic ret void } @@ -2688,10 +2688,10 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire ret void } @@ -2803,10 +2803,10 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire ret void } @@ -2918,10 +2918,10 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire ret void } @@ -3033,10 +3033,10 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire ret void } @@ -3148,10 +3148,10 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire ret void } @@ -3263,10 +3263,10 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst ret void } @@ -3378,10 +3378,10 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst ret void } @@ -3493,10 +3493,10 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst ret void } @@ -3608,10 +3608,10 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst ret void } @@ -3723,10 +3723,10 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst ret void } @@ -3866,12 +3866,12 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4011,12 +4011,12 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4156,12 +4156,12 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4301,12 +4301,12 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4446,12 +4446,12 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4591,12 +4591,12 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4736,12 +4736,12 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4881,12 +4881,12 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5026,12 +5026,12 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5171,12 +5171,12 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5316,12 +5316,12 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5461,12 +5461,12 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5606,12 +5606,12 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5751,12 +5751,12 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5896,12 +5896,12 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6033,10 +6033,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread-one-as") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread-one-as") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -6168,10 +6168,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread-one-as") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread-one-as") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -6303,10 +6303,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread-one-as") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread-one-as") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -6438,10 +6438,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("singlethread-one-as") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("singlethread-one-as") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -6555,9 +6555,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread-one-as") unordered, align 4 ret void } @@ -6671,9 +6671,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread-one-as") monotonic, align 4 ret void } @@ -6787,9 +6787,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread-one-as") release, align 4 ret void } @@ -6903,9 +6903,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("singlethread-one-as") seq_cst, align 4 ret void } @@ -7019,9 +7019,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") monotonic ret void } @@ -7135,9 +7135,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire ret void } @@ -7251,9 +7251,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") release ret void } @@ -7367,9 +7367,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel ret void } @@ -7483,9 +7483,9 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst ret void } @@ -7619,10 +7619,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -7756,10 +7756,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -7893,10 +7893,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -8008,10 +8008,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic ret void } @@ -8123,10 +8123,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic ret void } @@ -8238,10 +8238,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic ret void } @@ -8353,10 +8353,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic ret void } @@ -8468,10 +8468,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic ret void } @@ -8583,10 +8583,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire ret void } @@ -8698,10 +8698,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire ret void } @@ -8813,10 +8813,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire ret void } @@ -8928,10 +8928,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire ret void } @@ -9043,10 +9043,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire ret void } @@ -9158,10 +9158,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst ret void } @@ -9273,10 +9273,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst ret void } @@ -9388,10 +9388,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst ret void } @@ -9503,10 +9503,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst ret void } @@ -9618,10 +9618,10 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst ret void } @@ -9761,12 +9761,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -9906,12 +9906,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10051,12 +10051,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10196,12 +10196,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10341,12 +10341,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10486,12 +10486,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10631,12 +10631,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10776,12 +10776,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10921,12 +10921,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11066,12 +11066,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11211,12 +11211,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11356,12 +11356,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11501,12 +11501,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11646,12 +11646,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11791,12 +11791,12 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 877a7b1..5a38311 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_system_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in unordered, align 4 + store i32 %val, ptr %out ret void } @@ -273,10 +273,10 @@ define amdgpu_kernel void @flat_system_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -423,10 +423,10 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in acquire, align 4 + store i32 %val, ptr %out ret void } @@ -587,10 +587,10 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -704,9 +704,9 @@ define amdgpu_kernel void @flat_system_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out unordered, align 4 + store atomic i32 %in, ptr %out unordered, align 4 ret void } @@ -820,9 +820,9 @@ define amdgpu_kernel void @flat_system_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out monotonic, align 4 + store atomic i32 %in, ptr %out monotonic, align 4 ret void } @@ -954,9 +954,9 @@ define amdgpu_kernel void @flat_system_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out release, align 4 + store atomic i32 %in, ptr %out release, align 4 ret void } @@ -1088,9 +1088,9 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out seq_cst, align 4 + store atomic i32 %in, ptr %out seq_cst, align 4 ret void } @@ -1204,9 +1204,9 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in monotonic ret void } @@ -1347,9 +1347,9 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in acquire ret void } @@ -1481,9 +1481,9 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in release + %val = atomicrmw volatile xchg ptr %out, i32 %in release ret void } @@ -1642,9 +1642,9 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel ret void } @@ -1803,9 +1803,9 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst ret void } @@ -1954,10 +1954,10 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in acquire + store i32 %val, ptr %out, align 4 ret void } @@ -2124,10 +2124,10 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -2294,10 +2294,10 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -2409,10 +2409,10 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic ret void } @@ -2551,10 +2551,10 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic ret void } @@ -2684,10 +2684,10 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic ret void } @@ -2844,10 +2844,10 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic ret void } @@ -3004,10 +3004,10 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic ret void } @@ -3146,10 +3146,10 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire ret void } @@ -3288,10 +3288,10 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire ret void } @@ -3448,10 +3448,10 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire ret void } @@ -3608,10 +3608,10 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire ret void } @@ -3768,10 +3768,10 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire ret void } @@ -3928,10 +3928,10 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst ret void } @@ -4088,10 +4088,10 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst ret void } @@ -4248,10 +4248,10 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst ret void } @@ -4408,10 +4408,10 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst ret void } @@ -4568,10 +4568,10 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -4711,12 +4711,12 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4871,12 +4871,12 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5034,12 +5034,12 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5212,12 +5212,12 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5390,12 +5390,12 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5550,12 +5550,12 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5710,12 +5710,12 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5888,12 +5888,12 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6066,12 +6066,12 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6244,12 +6244,12 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6422,12 +6422,12 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6600,12 +6600,12 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6778,12 +6778,12 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6956,12 +6956,12 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -7134,12 +7134,12 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -7271,10 +7271,10 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("one-as") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("one-as") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -7406,10 +7406,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("one-as") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("one-as") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -7564,10 +7564,10 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("one-as") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("one-as") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -7736,10 +7736,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("one-as") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("one-as") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -7853,9 +7853,9 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("one-as") unordered, align 4 ret void } @@ -7969,9 +7969,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("one-as") monotonic, align 4 ret void } @@ -8103,9 +8103,9 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("one-as") release, align 4 + store atomic i32 %in, ptr %out syncscope("one-as") release, align 4 ret void } @@ -8237,9 +8237,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("one-as") seq_cst, align 4 ret void } @@ -8353,9 +8353,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") monotonic ret void } @@ -8492,9 +8492,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire ret void } @@ -8626,9 +8626,9 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") release ret void } @@ -8783,9 +8783,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel ret void } @@ -8940,9 +8940,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst ret void } @@ -9098,10 +9098,10 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -9275,10 +9275,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -9452,10 +9452,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -9567,10 +9567,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic ret void } @@ -9705,10 +9705,10 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic ret void } @@ -9838,10 +9838,10 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic ret void } @@ -9994,10 +9994,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic ret void } @@ -10150,10 +10150,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic ret void } @@ -10288,10 +10288,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire ret void } @@ -10426,10 +10426,10 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire ret void } @@ -10582,10 +10582,10 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire ret void } @@ -10738,10 +10738,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire ret void } @@ -10894,10 +10894,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire ret void } @@ -11050,10 +11050,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst ret void } @@ -11206,10 +11206,10 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst ret void } @@ -11362,10 +11362,10 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst ret void } @@ -11518,10 +11518,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst ret void } @@ -11674,10 +11674,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst ret void } @@ -11817,12 +11817,12 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11985,12 +11985,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12148,12 +12148,12 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12334,12 +12334,12 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12520,12 +12520,12 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12688,12 +12688,12 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12856,12 +12856,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13042,12 +13042,12 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13228,12 +13228,12 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13414,12 +13414,12 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13600,12 +13600,12 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13786,12 +13786,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -13972,12 +13972,12 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -14158,12 +14158,12 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -14344,12 +14344,12 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll index 7b46e26..d8879dd 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -88,10 +88,10 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load volatile i32, i32* %in, align 4 - store i32 %val, i32* %out + %val = load volatile i32, ptr %in, align 4 + store i32 %val, ptr %out ret void } @@ -189,12 +189,12 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid - %val = load volatile i32, i32* %val.gep, align 4 - store i32 %val, i32* %out + %val.gep = getelementptr inbounds i32, ptr %in, i32 %tid + %val = load volatile i32, ptr %val.gep, align 4 + store i32 %val, ptr %out ret void } @@ -280,10 +280,10 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load i32, i32* %in, align 4 - store volatile i32 %val, i32* %out + %val = load i32, ptr %in, align 4 + store volatile i32 %val, ptr %out ret void } @@ -381,12 +381,12 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32* %in, align 4 - %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid - store volatile i32 %val, i32* %out.gep + %val = load i32, ptr %in, align 4 + %out.gep = getelementptr inbounds i32, ptr %out, i32 %tid + store volatile i32 %val, ptr %out.gep ret void } @@ -472,10 +472,10 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic volatile i32, i32* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic volatile i32, ptr %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -557,9 +557,9 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic volatile i32 %in, i32* %out syncscope("workgroup") release, align 4 + store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index 2c9c612..d92364f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_wavefront_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -273,10 +273,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -408,10 +408,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -543,10 +543,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -660,9 +660,9 @@ define amdgpu_kernel void @flat_wavefront_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront") unordered, align 4 ret void } @@ -776,9 +776,9 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront") monotonic, align 4 ret void } @@ -892,9 +892,9 @@ define amdgpu_kernel void @flat_wavefront_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront") release, align 4 ret void } @@ -1008,9 +1008,9 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront") seq_cst, align 4 ret void } @@ -1124,9 +1124,9 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") monotonic ret void } @@ -1240,9 +1240,9 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire ret void } @@ -1356,9 +1356,9 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") release ret void } @@ -1472,9 +1472,9 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel ret void } @@ -1588,9 +1588,9 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst ret void } @@ -1724,10 +1724,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -1861,10 +1861,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -1998,10 +1998,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -2113,10 +2113,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic ret void } @@ -2228,10 +2228,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic ret void } @@ -2343,10 +2343,10 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic ret void } @@ -2458,10 +2458,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic ret void } @@ -2573,10 +2573,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic ret void } @@ -2688,10 +2688,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire ret void } @@ -2803,10 +2803,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire ret void } @@ -2918,10 +2918,10 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire ret void } @@ -3033,10 +3033,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire ret void } @@ -3148,10 +3148,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire ret void } @@ -3263,10 +3263,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst ret void } @@ -3378,10 +3378,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst ret void } @@ -3493,10 +3493,10 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst ret void } @@ -3608,10 +3608,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst ret void } @@ -3723,10 +3723,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst ret void } @@ -3866,12 +3866,12 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4011,12 +4011,12 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4156,12 +4156,12 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4301,12 +4301,12 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4446,12 +4446,12 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4591,12 +4591,12 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4736,12 +4736,12 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4881,12 +4881,12 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5026,12 +5026,12 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5171,12 +5171,12 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5316,12 +5316,12 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5461,12 +5461,12 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5606,12 +5606,12 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5751,12 +5751,12 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5896,12 +5896,12 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6033,10 +6033,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront-one-as") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront-one-as") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -6168,10 +6168,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront-one-as") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront-one-as") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -6303,10 +6303,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront-one-as") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront-one-as") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -6438,10 +6438,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("wavefront-one-as") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("wavefront-one-as") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -6555,9 +6555,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront-one-as") unordered, align 4 ret void } @@ -6671,9 +6671,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront-one-as") monotonic, align 4 ret void } @@ -6787,9 +6787,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront-one-as") release, align 4 ret void } @@ -6903,9 +6903,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("wavefront-one-as") seq_cst, align 4 ret void } @@ -7019,9 +7019,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") monotonic ret void } @@ -7135,9 +7135,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire ret void } @@ -7251,9 +7251,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") release ret void } @@ -7367,9 +7367,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel ret void } @@ -7483,9 +7483,9 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst ret void } @@ -7619,10 +7619,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -7756,10 +7756,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -7893,10 +7893,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -8008,10 +8008,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic ret void } @@ -8123,10 +8123,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic ret void } @@ -8238,10 +8238,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic ret void } @@ -8353,10 +8353,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic ret void } @@ -8468,10 +8468,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic ret void } @@ -8583,10 +8583,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire ret void } @@ -8698,10 +8698,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire ret void } @@ -8813,10 +8813,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire ret void } @@ -8928,10 +8928,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire ret void } @@ -9043,10 +9043,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire ret void } @@ -9158,10 +9158,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst ret void } @@ -9273,10 +9273,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst ret void } @@ -9388,10 +9388,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst ret void } @@ -9503,10 +9503,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst ret void } @@ -9618,10 +9618,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst ret void } @@ -9761,12 +9761,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -9906,12 +9906,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10051,12 +10051,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10196,12 +10196,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10341,12 +10341,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10486,12 +10486,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10631,12 +10631,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10776,12 +10776,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10921,12 +10921,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11066,12 +11066,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11211,12 +11211,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11356,12 +11356,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11501,12 +11501,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11646,12 +11646,12 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index d9c3699..1669d30 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -138,10 +138,10 @@ define amdgpu_kernel void @flat_workgroup_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -273,10 +273,10 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -418,10 +418,10 @@ define amdgpu_kernel void @flat_workgroup_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -575,10 +575,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -692,9 +692,9 @@ define amdgpu_kernel void @flat_workgroup_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup") unordered, align 4 ret void } @@ -808,9 +808,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup") monotonic, align 4 ret void } @@ -936,9 +936,9 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup") release, align 4 ret void } @@ -1064,9 +1064,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup") seq_cst, align 4 ret void } @@ -1180,9 +1180,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") monotonic ret void } @@ -1311,9 +1311,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire ret void } @@ -1439,9 +1439,9 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") release ret void } @@ -1582,9 +1582,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel ret void } @@ -1725,9 +1725,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst ret void } @@ -1865,10 +1865,10 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -2018,10 +2018,10 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -2171,10 +2171,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -2286,10 +2286,10 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic ret void } @@ -2416,10 +2416,10 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic ret void } @@ -2543,10 +2543,10 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic ret void } @@ -2685,10 +2685,10 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic ret void } @@ -2827,10 +2827,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic ret void } @@ -2957,10 +2957,10 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire ret void } @@ -3087,10 +3087,10 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire ret void } @@ -3229,10 +3229,10 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire ret void } @@ -3371,10 +3371,10 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire ret void } @@ -3513,10 +3513,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire ret void } @@ -3655,10 +3655,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst ret void } @@ -3798,12 +3798,12 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -3950,12 +3950,12 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4107,12 +4107,12 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4271,12 +4271,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4435,12 +4435,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4587,12 +4587,12 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4739,12 +4739,12 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -4903,12 +4903,12 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5067,12 +5067,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5231,12 +5231,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5395,12 +5395,12 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5559,12 +5559,12 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5723,12 +5723,12 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -5887,12 +5887,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6051,12 +6051,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -6188,10 +6188,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup-one-as") unordered, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup-one-as") unordered, align 4 + store i32 %val, ptr %out ret void } @@ -6323,10 +6323,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup-one-as") monotonic, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup-one-as") monotonic, align 4 + store i32 %val, ptr %out ret void } @@ -6464,10 +6464,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup-one-as") acquire, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup-one-as") acquire, align 4 + store i32 %val, ptr %out ret void } @@ -6611,10 +6611,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("workgroup-one-as") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("workgroup-one-as") seq_cst, align 4 + store i32 %val, ptr %out ret void } @@ -6728,9 +6728,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup-one-as") unordered, align 4 ret void } @@ -6844,9 +6844,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup-one-as") monotonic, align 4 ret void } @@ -6966,9 +6966,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup-one-as") release, align 4 ret void } @@ -7088,9 +7088,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("workgroup-one-as") seq_cst, align 4 ret void } @@ -7204,9 +7204,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") monotonic ret void } @@ -7327,9 +7327,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire ret void } @@ -7449,9 +7449,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") release ret void } @@ -7578,9 +7578,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel ret void } @@ -7707,9 +7707,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst ret void } @@ -7849,10 +7849,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire + store i32 %val, ptr %out, align 4 ret void } @@ -7998,10 +7998,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel + store i32 %val, ptr %out, align 4 ret void } @@ -8147,10 +8147,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst - store i32 %val, i32* %out, align 4 + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst + store i32 %val, ptr %out, align 4 ret void } @@ -8262,10 +8262,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic ret void } @@ -8384,10 +8384,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic ret void } @@ -8505,10 +8505,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic ret void } @@ -8633,10 +8633,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic ret void } @@ -8761,10 +8761,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic ret void } @@ -8883,10 +8883,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire ret void } @@ -9005,10 +9005,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire ret void } @@ -9133,10 +9133,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire ret void } @@ -9261,10 +9261,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire ret void } @@ -9389,10 +9389,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire ret void } @@ -9517,10 +9517,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst ret void } @@ -9645,10 +9645,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst ret void } @@ -9773,10 +9773,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst ret void } @@ -9901,10 +9901,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst ret void } @@ -10029,10 +10029,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst ret void } @@ -10172,12 +10172,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10323,12 +10323,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10474,12 +10474,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10631,12 +10631,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10788,12 +10788,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -10939,12 +10939,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11090,12 +11090,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11247,12 +11247,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11404,12 +11404,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11561,12 +11561,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11718,12 +11718,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -11875,12 +11875,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12032,12 +12032,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12189,12 +12189,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } @@ -12346,12 +12346,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32* %out, align 4 + store i32 %val0, ptr %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index 2f38c84..da5f05c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -136,10 +136,10 @@ define amdgpu_kernel void @global_agent_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -268,10 +268,10 @@ define amdgpu_kernel void @global_agent_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -414,10 +414,10 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -567,10 +567,10 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -691,9 +691,9 @@ define amdgpu_kernel void @global_agent_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") unordered, align 4 ret void } @@ -814,9 +814,9 @@ define amdgpu_kernel void @global_agent_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4 ret void } @@ -954,9 +954,9 @@ define amdgpu_kernel void @global_agent_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") release, align 4 ret void } @@ -1094,9 +1094,9 @@ define amdgpu_kernel void @global_agent_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4 ret void } @@ -1217,9 +1217,9 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") monotonic ret void } @@ -1363,9 +1363,9 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire ret void } @@ -1503,9 +1503,9 @@ define amdgpu_kernel void @global_agent_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") release ret void } @@ -1666,9 +1666,9 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel ret void } @@ -1829,9 +1829,9 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst ret void } @@ -1988,10 +1988,10 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2165,10 +2165,10 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2342,10 +2342,10 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2464,10 +2464,10 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic ret void } @@ -2609,10 +2609,10 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic ret void } @@ -2748,10 +2748,10 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic ret void } @@ -2910,10 +2910,10 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic ret void } @@ -3072,10 +3072,10 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic ret void } @@ -3217,10 +3217,10 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire ret void } @@ -3362,10 +3362,10 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire ret void } @@ -3524,10 +3524,10 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire ret void } @@ -3686,10 +3686,10 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire ret void } @@ -3848,10 +3848,10 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire ret void } @@ -4010,10 +4010,10 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst ret void } @@ -4172,10 +4172,10 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst ret void } @@ -4334,10 +4334,10 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst ret void } @@ -4496,10 +4496,10 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst ret void } @@ -4658,10 +4658,10 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -4804,12 +4804,12 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4966,12 +4966,12 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5131,12 +5131,12 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5310,12 +5310,12 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5489,12 +5489,12 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5651,12 +5651,12 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5813,12 +5813,12 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5992,12 +5992,12 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6171,12 +6171,12 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6350,12 +6350,12 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6529,12 +6529,12 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6708,12 +6708,12 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6887,12 +6887,12 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -7066,12 +7066,12 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -7245,12 +7245,12 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -7379,10 +7379,10 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7511,10 +7511,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7657,10 +7657,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7810,10 +7810,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7934,9 +7934,9 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") unordered, align 4 ret void } @@ -8057,9 +8057,9 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") monotonic, align 4 ret void } @@ -8197,9 +8197,9 @@ define amdgpu_kernel void @global_agent_one_as_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") release, align 4 ret void } @@ -8337,9 +8337,9 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") seq_cst, align 4 ret void } @@ -8460,9 +8460,9 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") monotonic ret void } @@ -8606,9 +8606,9 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire ret void } @@ -8746,9 +8746,9 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") release ret void } @@ -8909,9 +8909,9 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel ret void } @@ -9072,9 +9072,9 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst ret void } @@ -9231,10 +9231,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -9408,10 +9408,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -9585,10 +9585,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -9707,10 +9707,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic ret void } @@ -9852,10 +9852,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic ret void } @@ -9991,10 +9991,10 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic ret void } @@ -10153,10 +10153,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic ret void } @@ -10315,10 +10315,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic ret void } @@ -10460,10 +10460,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire ret void } @@ -10605,10 +10605,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire ret void } @@ -10767,10 +10767,10 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire ret void } @@ -10929,10 +10929,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire ret void } @@ -11091,10 +11091,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire ret void } @@ -11253,10 +11253,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst ret void } @@ -11415,10 +11415,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst ret void } @@ -11577,10 +11577,10 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst ret void } @@ -11739,10 +11739,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst ret void } @@ -11901,10 +11901,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst ret void } @@ -12047,12 +12047,12 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12209,12 +12209,12 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12388,12 +12388,12 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12567,12 +12567,12 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12729,12 +12729,12 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12891,12 +12891,12 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13070,12 +13070,12 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13249,12 +13249,12 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13428,12 +13428,12 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13607,12 +13607,12 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13786,12 +13786,12 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13965,12 +13965,12 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -14144,12 +14144,12 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -14323,12 +14323,12 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll index ad9f7d9..dadec88 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -139,10 +139,10 @@ define amdgpu_kernel void @global_nontemporal_load_0( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val = load i32, ptr addrspace(1) %in, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -283,12 +283,12 @@ define amdgpu_kernel void @global_nontemporal_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %val.gep, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -420,10 +420,10 @@ define amdgpu_kernel void @global_nontemporal_store_0( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + store i32 %val, ptr addrspace(1) %out, !nontemporal !0 ret void } @@ -559,12 +559,12 @@ define amdgpu_kernel void @global_nontemporal_store_1( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid - store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid + store i32 %val, ptr addrspace(1) %out.gep, !nontemporal !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll index cceedf9..61b13d0 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll @@ -136,10 +136,10 @@ define amdgpu_kernel void @global_singlethread_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -268,10 +268,10 @@ define amdgpu_kernel void @global_singlethread_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -400,10 +400,10 @@ define amdgpu_kernel void @global_singlethread_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -532,10 +532,10 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -656,9 +656,9 @@ define amdgpu_kernel void @global_singlethread_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") unordered, align 4 ret void } @@ -779,9 +779,9 @@ define amdgpu_kernel void @global_singlethread_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") monotonic, align 4 ret void } @@ -902,9 +902,9 @@ define amdgpu_kernel void @global_singlethread_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") release, align 4 ret void } @@ -1025,9 +1025,9 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") seq_cst, align 4 ret void } @@ -1148,9 +1148,9 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") monotonic ret void } @@ -1271,9 +1271,9 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire ret void } @@ -1394,9 +1394,9 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") release ret void } @@ -1517,9 +1517,9 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel ret void } @@ -1640,9 +1640,9 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst ret void } @@ -1785,10 +1785,10 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -1931,10 +1931,10 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2077,10 +2077,10 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2199,10 +2199,10 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic ret void } @@ -2321,10 +2321,10 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic ret void } @@ -2443,10 +2443,10 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic ret void } @@ -2565,10 +2565,10 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic ret void } @@ -2687,10 +2687,10 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic ret void } @@ -2809,10 +2809,10 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire ret void } @@ -2931,10 +2931,10 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire ret void } @@ -3053,10 +3053,10 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release acquire ret void } @@ -3175,10 +3175,10 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire ret void } @@ -3297,10 +3297,10 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire ret void } @@ -3419,10 +3419,10 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst ret void } @@ -3541,10 +3541,10 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst ret void } @@ -3663,10 +3663,10 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst ret void } @@ -3785,10 +3785,10 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst ret void } @@ -3907,10 +3907,10 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst ret void } @@ -4053,12 +4053,12 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4201,12 +4201,12 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4349,12 +4349,12 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4497,12 +4497,12 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4645,12 +4645,12 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4793,12 +4793,12 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4941,12 +4941,12 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5089,12 +5089,12 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5237,12 +5237,12 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5385,12 +5385,12 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5533,12 +5533,12 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5681,12 +5681,12 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5829,12 +5829,12 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5977,12 +5977,12 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6125,12 +6125,12 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6259,10 +6259,10 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread-one-as") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6391,10 +6391,10 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6523,10 +6523,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread-one-as") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6655,10 +6655,10 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("singlethread-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6779,9 +6779,9 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") unordered, align 4 ret void } @@ -6902,9 +6902,9 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") monotonic, align 4 ret void } @@ -7025,9 +7025,9 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") release, align 4 ret void } @@ -7148,9 +7148,9 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("singlethread-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") seq_cst, align 4 ret void } @@ -7271,9 +7271,9 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") monotonic ret void } @@ -7394,9 +7394,9 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire ret void } @@ -7517,9 +7517,9 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") release ret void } @@ -7640,9 +7640,9 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel ret void } @@ -7763,9 +7763,9 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst ret void } @@ -7908,10 +7908,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8054,10 +8054,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8200,10 +8200,10 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("singlethread-one-as") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8322,10 +8322,10 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic ret void } @@ -8444,10 +8444,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic ret void } @@ -8566,10 +8566,10 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic ret void } @@ -8688,10 +8688,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic ret void } @@ -8810,10 +8810,10 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic ret void } @@ -8932,10 +8932,10 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire ret void } @@ -9054,10 +9054,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire ret void } @@ -9176,10 +9176,10 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire ret void } @@ -9298,10 +9298,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire ret void } @@ -9420,10 +9420,10 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire ret void } @@ -9542,10 +9542,10 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst ret void } @@ -9664,10 +9664,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst ret void } @@ -9786,10 +9786,10 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst ret void } @@ -9908,10 +9908,10 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst ret void } @@ -10030,10 +10030,10 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst ret void } @@ -10176,12 +10176,12 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10324,12 +10324,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10472,12 +10472,12 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10620,12 +10620,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10768,12 +10768,12 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10916,12 +10916,12 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11064,12 +11064,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11212,12 +11212,12 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11360,12 +11360,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11508,12 +11508,12 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11656,12 +11656,12 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11804,12 +11804,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11952,12 +11952,12 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12100,12 +12100,12 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12248,12 +12248,12 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index 06ff398..cd50b1e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -136,10 +136,10 @@ define amdgpu_kernel void @global_system_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -268,10 +268,10 @@ define amdgpu_kernel void @global_system_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -416,10 +416,10 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -571,10 +571,10 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -695,9 +695,9 @@ define amdgpu_kernel void @global_system_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out unordered, align 4 ret void } @@ -818,9 +818,9 @@ define amdgpu_kernel void @global_system_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out monotonic, align 4 ret void } @@ -960,9 +960,9 @@ define amdgpu_kernel void @global_system_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out release, align 4 + store atomic i32 %in, ptr addrspace(1) %out release, align 4 ret void } @@ -1102,9 +1102,9 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4 ret void } @@ -1225,9 +1225,9 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in monotonic ret void } @@ -1373,9 +1373,9 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire ret void } @@ -1515,9 +1515,9 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in release ret void } @@ -1682,9 +1682,9 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel ret void } @@ -1849,9 +1849,9 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst ret void } @@ -2010,10 +2010,10 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2191,10 +2191,10 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2372,10 +2372,10 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2494,10 +2494,10 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic ret void } @@ -2641,10 +2641,10 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic ret void } @@ -2782,10 +2782,10 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release monotonic ret void } @@ -2948,10 +2948,10 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic ret void } @@ -3114,10 +3114,10 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic ret void } @@ -3261,10 +3261,10 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire ret void } @@ -3408,10 +3408,10 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire ret void } @@ -3574,10 +3574,10 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire ret void } @@ -3740,10 +3740,10 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire ret void } @@ -3906,10 +3906,10 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire ret void } @@ -4072,10 +4072,10 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -4218,12 +4218,12 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4382,12 +4382,12 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4565,12 +4565,12 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4748,12 +4748,12 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4912,12 +4912,12 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5076,12 +5076,12 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5259,12 +5259,12 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5442,12 +5442,12 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5625,12 +5625,12 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5808,12 +5808,12 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5991,12 +5991,12 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6174,12 +6174,12 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6357,12 +6357,12 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6540,12 +6540,12 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6674,10 +6674,10 @@ define amdgpu_kernel void @global_system_one_as_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6806,10 +6806,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6954,10 +6954,10 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7109,10 +7109,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7233,9 +7233,9 @@ define amdgpu_kernel void @global_system_one_as_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") unordered, align 4 ret void } @@ -7356,9 +7356,9 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") monotonic, align 4 ret void } @@ -7498,9 +7498,9 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") release, align 4 ret void } @@ -7640,9 +7640,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") seq_cst, align 4 ret void } @@ -7763,9 +7763,9 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") monotonic ret void } @@ -7911,9 +7911,9 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire ret void } @@ -8053,9 +8053,9 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") release ret void } @@ -8220,9 +8220,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel ret void } @@ -8387,9 +8387,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst ret void } @@ -8548,10 +8548,10 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8729,10 +8729,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8910,10 +8910,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -9032,10 +9032,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic ret void } @@ -9179,10 +9179,10 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic ret void } @@ -9320,10 +9320,10 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic ret void } @@ -9486,10 +9486,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic ret void } @@ -9652,10 +9652,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic ret void } @@ -9799,10 +9799,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire ret void } @@ -9946,10 +9946,10 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire ret void } @@ -10112,10 +10112,10 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire ret void } @@ -10278,10 +10278,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire ret void } @@ -10444,10 +10444,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire ret void } @@ -10610,10 +10610,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst ret void } @@ -10776,10 +10776,10 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst ret void } @@ -10942,10 +10942,10 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst ret void } @@ -11108,10 +11108,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst ret void } @@ -11274,10 +11274,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst ret void } @@ -11420,12 +11420,12 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11584,12 +11584,12 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11751,12 +11751,12 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11934,12 +11934,12 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12117,12 +12117,12 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12281,12 +12281,12 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12445,12 +12445,12 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12628,12 +12628,12 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12811,12 +12811,12 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12994,12 +12994,12 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13177,12 +13177,12 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13360,12 +13360,12 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13543,12 +13543,12 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13726,12 +13726,12 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13909,12 +13909,12 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll index f552c0a..c8aa089 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll @@ -92,10 +92,10 @@ define amdgpu_kernel void @global_volatile_load_0( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load volatile i32, i32 addrspace(1)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load volatile i32, ptr addrspace(1) %in, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -192,12 +192,12 @@ define amdgpu_kernel void @global_volatile_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %val = load volatile i32, i32 addrspace(1)* %val.gep, align 4 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %val = load volatile i32, ptr addrspace(1) %val.gep, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -292,10 +292,10 @@ define amdgpu_kernel void @global_volatile_store_0( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store volatile i32 %val, i32 addrspace(1)* %out + %val = load i32, ptr addrspace(1) %in, align 4 + store volatile i32 %val, ptr addrspace(1) %out ret void } @@ -394,12 +394,12 @@ define amdgpu_kernel void @global_volatile_store_1( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid - store volatile i32 %val, i32 addrspace(1)* %out.gep + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid + store volatile i32 %val, ptr addrspace(1) %out.gep ret void } @@ -490,10 +490,10 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic volatile i32, i32 addrspace(1)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -583,9 +583,9 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic volatile i32 %in, i32 addrspace(1)* %out syncscope("workgroup") release, align 4 + store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll index e518cb5..57752ba 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll @@ -136,10 +136,10 @@ define amdgpu_kernel void @global_wavefront_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -268,10 +268,10 @@ define amdgpu_kernel void @global_wavefront_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -400,10 +400,10 @@ define amdgpu_kernel void @global_wavefront_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -532,10 +532,10 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -656,9 +656,9 @@ define amdgpu_kernel void @global_wavefront_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") unordered, align 4 ret void } @@ -779,9 +779,9 @@ define amdgpu_kernel void @global_wavefront_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4 ret void } @@ -902,9 +902,9 @@ define amdgpu_kernel void @global_wavefront_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") release, align 4 ret void } @@ -1025,9 +1025,9 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") seq_cst, align 4 ret void } @@ -1148,9 +1148,9 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") monotonic ret void } @@ -1271,9 +1271,9 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire ret void } @@ -1394,9 +1394,9 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") release ret void } @@ -1517,9 +1517,9 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel ret void } @@ -1640,9 +1640,9 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst ret void } @@ -1785,10 +1785,10 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -1931,10 +1931,10 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2077,10 +2077,10 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2199,10 +2199,10 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic ret void } @@ -2321,10 +2321,10 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic ret void } @@ -2443,10 +2443,10 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic ret void } @@ -2565,10 +2565,10 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic ret void } @@ -2687,10 +2687,10 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic ret void } @@ -2809,10 +2809,10 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire ret void } @@ -2931,10 +2931,10 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire ret void } @@ -3053,10 +3053,10 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire ret void } @@ -3175,10 +3175,10 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire ret void } @@ -3297,10 +3297,10 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire ret void } @@ -3419,10 +3419,10 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst ret void } @@ -3541,10 +3541,10 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst ret void } @@ -3663,10 +3663,10 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst ret void } @@ -3785,10 +3785,10 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst ret void } @@ -3907,10 +3907,10 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst ret void } @@ -4053,12 +4053,12 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4201,12 +4201,12 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4349,12 +4349,12 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4497,12 +4497,12 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4645,12 +4645,12 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4793,12 +4793,12 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4941,12 +4941,12 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5089,12 +5089,12 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5237,12 +5237,12 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5385,12 +5385,12 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5533,12 +5533,12 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5681,12 +5681,12 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5829,12 +5829,12 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5977,12 +5977,12 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6125,12 +6125,12 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6259,10 +6259,10 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront-one-as") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6391,10 +6391,10 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6523,10 +6523,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront-one-as") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6655,10 +6655,10 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("wavefront-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6779,9 +6779,9 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") unordered, align 4 ret void } @@ -6902,9 +6902,9 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") monotonic, align 4 ret void } @@ -7025,9 +7025,9 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") release, align 4 ret void } @@ -7148,9 +7148,9 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("wavefront-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") seq_cst, align 4 ret void } @@ -7271,9 +7271,9 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") monotonic ret void } @@ -7394,9 +7394,9 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire ret void } @@ -7517,9 +7517,9 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") release ret void } @@ -7640,9 +7640,9 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel ret void } @@ -7763,9 +7763,9 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst ret void } @@ -7908,10 +7908,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8054,10 +8054,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8200,10 +8200,10 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("wavefront-one-as") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8322,10 +8322,10 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic ret void } @@ -8444,10 +8444,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic ret void } @@ -8566,10 +8566,10 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic ret void } @@ -8688,10 +8688,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic ret void } @@ -8810,10 +8810,10 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic ret void } @@ -8932,10 +8932,10 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire ret void } @@ -9054,10 +9054,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire ret void } @@ -9176,10 +9176,10 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire ret void } @@ -9298,10 +9298,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire ret void } @@ -9420,10 +9420,10 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire ret void } @@ -9542,10 +9542,10 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst ret void } @@ -9664,10 +9664,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst ret void } @@ -9786,10 +9786,10 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst ret void } @@ -9908,10 +9908,10 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst ret void } @@ -10030,10 +10030,10 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst ret void } @@ -10176,12 +10176,12 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10324,12 +10324,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10472,12 +10472,12 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10620,12 +10620,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10768,12 +10768,12 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -10916,12 +10916,12 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11064,12 +11064,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11212,12 +11212,12 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11360,12 +11360,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11508,12 +11508,12 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11656,12 +11656,12 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11804,12 +11804,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11952,12 +11952,12 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12100,12 +12100,12 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12248,12 +12248,12 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index bdfdfb3..f81c985 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -136,10 +136,10 @@ define amdgpu_kernel void @global_workgroup_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -268,10 +268,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -404,10 +404,10 @@ define amdgpu_kernel void @global_workgroup_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -545,10 +545,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -669,9 +669,9 @@ define amdgpu_kernel void @global_workgroup_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") unordered, align 4 ret void } @@ -792,9 +792,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") monotonic, align 4 ret void } @@ -928,9 +928,9 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4 ret void } @@ -1064,9 +1064,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") seq_cst, align 4 ret void } @@ -1187,9 +1187,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") monotonic ret void } @@ -1317,9 +1317,9 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire ret void } @@ -1453,9 +1453,9 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") release ret void } @@ -1596,9 +1596,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel ret void } @@ -1739,9 +1739,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst ret void } @@ -1888,10 +1888,10 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2051,10 +2051,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2214,10 +2214,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -2336,10 +2336,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic ret void } @@ -2465,10 +2465,10 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic ret void } @@ -2600,10 +2600,10 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic ret void } @@ -2742,10 +2742,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic ret void } @@ -2884,10 +2884,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic ret void } @@ -3013,10 +3013,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire ret void } @@ -3142,10 +3142,10 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire ret void } @@ -3284,10 +3284,10 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release acquire ret void } @@ -3426,10 +3426,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire ret void } @@ -3568,10 +3568,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire ret void } @@ -3710,10 +3710,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst ret void } @@ -3852,10 +3852,10 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst ret void } @@ -3994,10 +3994,10 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst ret void } @@ -4136,10 +4136,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst ret void } @@ -4278,10 +4278,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst ret void } @@ -4424,12 +4424,12 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4576,12 +4576,12 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4737,12 +4737,12 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -4902,12 +4902,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5067,12 +5067,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5219,12 +5219,12 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5371,12 +5371,12 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5536,12 +5536,12 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5701,12 +5701,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -5866,12 +5866,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6031,12 +6031,12 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6196,12 +6196,12 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6361,12 +6361,12 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6526,12 +6526,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6691,12 +6691,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -6825,10 +6825,10 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup-one-as") unordered, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") unordered, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -6957,10 +6957,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7093,10 +7093,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup-one-as") acquire, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") acquire, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7231,10 +7231,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(1)* %out) { + ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %val = load atomic i32, i32 addrspace(1)* %in syncscope("workgroup-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -7355,9 +7355,9 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") unordered, align 4 ret void } @@ -7478,9 +7478,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") monotonic, align 4 ret void } @@ -7607,9 +7607,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") release, align 4 ret void } @@ -7736,9 +7736,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(1)* %out) { + i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out syncscope("workgroup-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") seq_cst, align 4 ret void } @@ -7859,9 +7859,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") monotonic ret void } @@ -7989,9 +7989,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire ret void } @@ -8118,9 +8118,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") release ret void } @@ -8254,9 +8254,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel ret void } @@ -8390,9 +8390,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst ret void } @@ -8539,10 +8539,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") acquire - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8695,10 +8695,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") acq_rel - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8851,10 +8851,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in) { + ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("workgroup-one-as") seq_cst - store i32 %val, i32 addrspace(1)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -8973,10 +8973,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic ret void } @@ -9102,10 +9102,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic ret void } @@ -9230,10 +9230,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic ret void } @@ -9365,10 +9365,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic ret void } @@ -9500,10 +9500,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic ret void } @@ -9629,10 +9629,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire ret void } @@ -9758,10 +9758,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire ret void } @@ -9893,10 +9893,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire ret void } @@ -10028,10 +10028,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire ret void } @@ -10163,10 +10163,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire ret void } @@ -10298,10 +10298,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst ret void } @@ -10433,10 +10433,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst ret void } @@ -10568,10 +10568,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst ret void } @@ -10703,10 +10703,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst ret void } @@ -10838,10 +10838,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst ret void } @@ -10984,12 +10984,12 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11136,12 +11136,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11290,12 +11290,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11448,12 +11448,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11606,12 +11606,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11758,12 +11758,12 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -11910,12 +11910,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12068,12 +12068,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12226,12 +12226,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12384,12 +12384,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12542,12 +12542,12 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12700,12 +12700,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -12858,12 +12858,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13016,12 +13016,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } @@ -13174,12 +13174,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %out, i32 %in, i32 %old) { + ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out, align 4 + store i32 %val0, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll index 60a956e..64da9e5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll @@ -10,36 +10,36 @@ entry: ret void } -; CHECK: error: :0:0: in function invalid_load void (i32*, i32*): Unsupported non-inclusive atomic synchronization scope +; CHECK: error: :0:0: in function invalid_load void (ptr, ptr): Unsupported non-inclusive atomic synchronization scope define amdgpu_kernel void @invalid_load( - i32* %in, i32* %out) { + ptr %in, ptr %out) { entry: - %val = load atomic i32, i32* %in syncscope("invalid") seq_cst, align 4 - store i32 %val, i32* %out + %val = load atomic i32, ptr %in syncscope("invalid") seq_cst, align 4 + store i32 %val, ptr %out ret void } -; CHECK: error: :0:0: in function invalid_store void (i32, i32*): Unsupported non-inclusive atomic synchronization scope +; CHECK: error: :0:0: in function invalid_store void (i32, ptr): Unsupported non-inclusive atomic synchronization scope define amdgpu_kernel void @invalid_store( - i32 %in, i32* %out) { + i32 %in, ptr %out) { entry: - store atomic i32 %in, i32* %out syncscope("invalid") seq_cst, align 4 + store atomic i32 %in, ptr %out syncscope("invalid") seq_cst, align 4 ret void } -; CHECK: error: :0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported non-inclusive atomic synchronization scope +; CHECK: error: :0:0: in function invalid_cmpxchg void (ptr, i32, i32): Unsupported non-inclusive atomic synchronization scope define amdgpu_kernel void @invalid_cmpxchg( - i32* %out, i32 %in, i32 %old) { + ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32* %out, i32 4 - %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst + %gep = getelementptr i32, ptr %out, i32 4 + %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst ret void } -; CHECK: error: :0:0: in function invalid_rmw void (i32*, i32): Unsupported non-inclusive atomic synchronization scope +; CHECK: error: :0:0: in function invalid_rmw void (ptr, i32): Unsupported non-inclusive atomic synchronization scope define amdgpu_kernel void @invalid_rmw( - i32* %out, i32 %in) { + ptr %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("invalid") seq_cst + %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("invalid") seq_cst ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll index 017ce4e..6de04e3 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -133,10 +133,10 @@ define amdgpu_kernel void @local_agent_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -262,10 +262,10 @@ define amdgpu_kernel void @local_agent_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -395,10 +395,10 @@ define amdgpu_kernel void @local_agent_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -541,10 +541,10 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -648,9 +648,9 @@ define amdgpu_kernel void @local_agent_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") unordered, align 4 ret void } @@ -754,9 +754,9 @@ define amdgpu_kernel void @local_agent_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4 ret void } @@ -873,9 +873,9 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") release, align 4 ret void } @@ -992,9 +992,9 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4 ret void } @@ -1098,9 +1098,9 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") monotonic ret void } @@ -1217,9 +1217,9 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire ret void } @@ -1336,9 +1336,9 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") release ret void } @@ -1468,9 +1468,9 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel ret void } @@ -1600,9 +1600,9 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst ret void } @@ -1732,10 +1732,10 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1878,10 +1878,10 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2024,10 +2024,10 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2143,10 +2143,10 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic ret void } @@ -2275,10 +2275,10 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic ret void } @@ -2407,10 +2407,10 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release monotonic ret void } @@ -2552,10 +2552,10 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic ret void } @@ -2697,10 +2697,10 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic ret void } @@ -2829,10 +2829,10 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire ret void } @@ -2961,10 +2961,10 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire ret void } @@ -3106,10 +3106,10 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release acquire ret void } @@ -3251,10 +3251,10 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire ret void } @@ -3396,10 +3396,10 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire ret void } @@ -3541,10 +3541,10 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst ret void } @@ -3686,10 +3686,10 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst ret void } @@ -3831,10 +3831,10 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst ret void } @@ -3976,10 +3976,10 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst ret void } @@ -4121,10 +4121,10 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -4262,12 +4262,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4409,12 +4409,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4565,12 +4565,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4725,12 +4725,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4885,12 +4885,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5032,12 +5032,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5179,12 +5179,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5339,12 +5339,12 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5499,12 +5499,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5659,12 +5659,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5819,12 +5819,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5979,12 +5979,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6139,12 +6139,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6299,12 +6299,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6459,12 +6459,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6590,10 +6590,10 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent-one-as") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6719,10 +6719,10 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6848,10 +6848,10 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent-one-as") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6977,10 +6977,10 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("agent-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -7084,9 +7084,9 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") unordered, align 4 ret void } @@ -7190,9 +7190,9 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") monotonic, align 4 ret void } @@ -7296,9 +7296,9 @@ define amdgpu_kernel void @local_agent_one_as_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") release, align 4 ret void } @@ -7402,9 +7402,9 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("agent-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") seq_cst, align 4 ret void } @@ -7508,9 +7508,9 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") monotonic ret void } @@ -7614,9 +7614,9 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire ret void } @@ -7720,9 +7720,9 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") release ret void } @@ -7826,9 +7826,9 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel ret void } @@ -7932,9 +7932,9 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst ret void } @@ -8060,10 +8060,10 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8189,10 +8189,10 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8318,10 +8318,10 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("agent-one-as") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8437,10 +8437,10 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic ret void } @@ -8556,10 +8556,10 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic ret void } @@ -8675,10 +8675,10 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic ret void } @@ -8794,10 +8794,10 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic ret void } @@ -8913,10 +8913,10 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic ret void } @@ -9032,10 +9032,10 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire ret void } @@ -9151,10 +9151,10 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire ret void } @@ -9270,10 +9270,10 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire ret void } @@ -9389,10 +9389,10 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire ret void } @@ -9508,10 +9508,10 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire ret void } @@ -9627,10 +9627,10 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst ret void } @@ -9746,10 +9746,10 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst ret void } @@ -9865,10 +9865,10 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst ret void } @@ -9984,10 +9984,10 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst ret void } @@ -10103,10 +10103,10 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst ret void } @@ -10244,12 +10244,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10387,12 +10387,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10530,12 +10530,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10673,12 +10673,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10816,12 +10816,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10959,12 +10959,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11102,12 +11102,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11245,12 +11245,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11388,12 +11388,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11531,12 +11531,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11674,12 +11674,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11817,12 +11817,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11960,12 +11960,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12103,12 +12103,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12246,12 +12246,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll index f520637..130d16e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -153,10 +153,10 @@ define amdgpu_kernel void @local_nontemporal_load_0( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(1)* %out) { + ptr addrspace(3) %in, ptr addrspace(1) %out) { entry: - %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val = load i32, ptr addrspace(3) %in, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -307,12 +307,12 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(1)* %out) { + ptr addrspace(3) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid - %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(3) %in, i32 %tid + %val = load i32, ptr addrspace(3) %val.gep, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -453,10 +453,10 @@ define amdgpu_kernel void @local_nontemporal_store_0( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(3)* %out) { + ptr addrspace(1) %in, ptr addrspace(3) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + store i32 %val, ptr addrspace(3) %out, !nontemporal !0 ret void } @@ -602,12 +602,12 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(3)* %out) { + ptr addrspace(1) %in, ptr addrspace(3) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid - store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tid + store i32 %val, ptr addrspace(3) %out.gep, !nontemporal !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll index 1713569..869ba86 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll @@ -133,10 +133,10 @@ define amdgpu_kernel void @local_singlethread_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -262,10 +262,10 @@ define amdgpu_kernel void @local_singlethread_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -391,10 +391,10 @@ define amdgpu_kernel void @local_singlethread_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -520,10 +520,10 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -627,9 +627,9 @@ define amdgpu_kernel void @local_singlethread_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") unordered, align 4 ret void } @@ -733,9 +733,9 @@ define amdgpu_kernel void @local_singlethread_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") monotonic, align 4 ret void } @@ -839,9 +839,9 @@ define amdgpu_kernel void @local_singlethread_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") release, align 4 ret void } @@ -945,9 +945,9 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") seq_cst, align 4 ret void } @@ -1051,9 +1051,9 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") monotonic ret void } @@ -1157,9 +1157,9 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire ret void } @@ -1263,9 +1263,9 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") release ret void } @@ -1369,9 +1369,9 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel ret void } @@ -1475,9 +1475,9 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst ret void } @@ -1603,10 +1603,10 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1732,10 +1732,10 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1861,10 +1861,10 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1980,10 +1980,10 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic ret void } @@ -2099,10 +2099,10 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic ret void } @@ -2218,10 +2218,10 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic ret void } @@ -2337,10 +2337,10 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic ret void } @@ -2456,10 +2456,10 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic ret void } @@ -2575,10 +2575,10 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire ret void } @@ -2694,10 +2694,10 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire ret void } @@ -2813,10 +2813,10 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release acquire ret void } @@ -2932,10 +2932,10 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire ret void } @@ -3051,10 +3051,10 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire ret void } @@ -3170,10 +3170,10 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst ret void } @@ -3289,10 +3289,10 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst ret void } @@ -3408,10 +3408,10 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst ret void } @@ -3527,10 +3527,10 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst ret void } @@ -3646,10 +3646,10 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst ret void } @@ -3787,12 +3787,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -3930,12 +3930,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4073,12 +4073,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4216,12 +4216,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4359,12 +4359,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4502,12 +4502,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4645,12 +4645,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4788,12 +4788,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4931,12 +4931,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5074,12 +5074,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5217,12 +5217,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5360,12 +5360,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5503,12 +5503,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5646,12 +5646,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5789,12 +5789,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5920,10 +5920,10 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6049,10 +6049,10 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6178,10 +6178,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6307,10 +6307,10 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6414,9 +6414,9 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") unordered, align 4 ret void } @@ -6520,9 +6520,9 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") monotonic, align 4 ret void } @@ -6626,9 +6626,9 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") release, align 4 ret void } @@ -6732,9 +6732,9 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") seq_cst, align 4 ret void } @@ -6838,9 +6838,9 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") monotonic ret void } @@ -6944,9 +6944,9 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire ret void } @@ -7050,9 +7050,9 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") release ret void } @@ -7156,9 +7156,9 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel ret void } @@ -7262,9 +7262,9 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst ret void } @@ -7390,10 +7390,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7519,10 +7519,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7648,10 +7648,10 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7767,10 +7767,10 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic ret void } @@ -7886,10 +7886,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic ret void } @@ -8005,10 +8005,10 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic ret void } @@ -8124,10 +8124,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic ret void } @@ -8243,10 +8243,10 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic ret void } @@ -8362,10 +8362,10 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire ret void } @@ -8481,10 +8481,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire ret void } @@ -8600,10 +8600,10 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire ret void } @@ -8719,10 +8719,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire ret void } @@ -8838,10 +8838,10 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire ret void } @@ -8957,10 +8957,10 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst ret void } @@ -9076,10 +9076,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst ret void } @@ -9195,10 +9195,10 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst ret void } @@ -9314,10 +9314,10 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst ret void } @@ -9433,10 +9433,10 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst ret void } @@ -9574,12 +9574,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -9717,12 +9717,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -9860,12 +9860,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10003,12 +10003,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10146,12 +10146,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10289,12 +10289,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10432,12 +10432,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10575,12 +10575,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10718,12 +10718,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10861,12 +10861,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11004,12 +11004,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11147,12 +11147,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11290,12 +11290,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11433,12 +11433,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11576,12 +11576,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll index da86bf7..d1f3459 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -133,10 +133,10 @@ define amdgpu_kernel void @local_system_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -262,10 +262,10 @@ define amdgpu_kernel void @local_system_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -395,10 +395,10 @@ define amdgpu_kernel void @local_system_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -541,10 +541,10 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -648,9 +648,9 @@ define amdgpu_kernel void @local_system_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out unordered, align 4 ret void } @@ -754,9 +754,9 @@ define amdgpu_kernel void @local_system_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out monotonic, align 4 ret void } @@ -873,9 +873,9 @@ define amdgpu_kernel void @local_system_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out release, align 4 + store atomic i32 %in, ptr addrspace(3) %out release, align 4 ret void } @@ -992,9 +992,9 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out seq_cst, align 4 ret void } @@ -1098,9 +1098,9 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in monotonic ret void } @@ -1217,9 +1217,9 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire ret void } @@ -1336,9 +1336,9 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in release ret void } @@ -1468,9 +1468,9 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel ret void } @@ -1600,9 +1600,9 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst ret void } @@ -1732,10 +1732,10 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1878,10 +1878,10 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2024,10 +2024,10 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2143,10 +2143,10 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic monotonic ret void } @@ -2275,10 +2275,10 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire monotonic ret void } @@ -2407,10 +2407,10 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release monotonic ret void } @@ -2552,10 +2552,10 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel monotonic ret void } @@ -2697,10 +2697,10 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst monotonic ret void } @@ -2829,10 +2829,10 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic acquire ret void } @@ -2961,10 +2961,10 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire acquire ret void } @@ -3106,10 +3106,10 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release acquire ret void } @@ -3251,10 +3251,10 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel acquire ret void } @@ -3396,10 +3396,10 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst acquire ret void } @@ -3541,10 +3541,10 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic seq_cst ret void } @@ -3686,10 +3686,10 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire seq_cst ret void } @@ -3831,10 +3831,10 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release seq_cst ret void } @@ -3976,10 +3976,10 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel seq_cst ret void } @@ -4121,10 +4121,10 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -4262,12 +4262,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4409,12 +4409,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4565,12 +4565,12 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4725,12 +4725,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4885,12 +4885,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5032,12 +5032,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5179,12 +5179,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5339,12 +5339,12 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5499,12 +5499,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5659,12 +5659,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5819,12 +5819,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5979,12 +5979,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6139,12 +6139,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6299,12 +6299,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6459,12 +6459,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6590,10 +6590,10 @@ define amdgpu_kernel void @local_system_one_as_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6719,10 +6719,10 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6848,10 +6848,10 @@ define amdgpu_kernel void @local_system_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6977,10 +6977,10 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -7084,9 +7084,9 @@ define amdgpu_kernel void @local_system_one_as_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") unordered, align 4 ret void } @@ -7190,9 +7190,9 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") monotonic, align 4 ret void } @@ -7296,9 +7296,9 @@ define amdgpu_kernel void @local_system_one_as_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") release, align 4 ret void } @@ -7402,9 +7402,9 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") seq_cst, align 4 ret void } @@ -7508,9 +7508,9 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") monotonic ret void } @@ -7614,9 +7614,9 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire ret void } @@ -7720,9 +7720,9 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") release ret void } @@ -7826,9 +7826,9 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel ret void } @@ -7932,9 +7932,9 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst ret void } @@ -8060,10 +8060,10 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8189,10 +8189,10 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8318,10 +8318,10 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8437,10 +8437,10 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic ret void } @@ -8556,10 +8556,10 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic ret void } @@ -8675,10 +8675,10 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic ret void } @@ -8794,10 +8794,10 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic ret void } @@ -8913,10 +8913,10 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic ret void } @@ -9032,10 +9032,10 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire ret void } @@ -9151,10 +9151,10 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire ret void } @@ -9270,10 +9270,10 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release acquire ret void } @@ -9389,10 +9389,10 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire ret void } @@ -9508,10 +9508,10 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire ret void } @@ -9627,10 +9627,10 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst ret void } @@ -9746,10 +9746,10 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst ret void } @@ -9865,10 +9865,10 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst ret void } @@ -9984,10 +9984,10 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst ret void } @@ -10103,10 +10103,10 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst ret void } @@ -10244,12 +10244,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10387,12 +10387,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10530,12 +10530,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10673,12 +10673,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10816,12 +10816,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10959,12 +10959,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11102,12 +11102,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11245,12 +11245,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11388,12 +11388,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11531,12 +11531,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11674,12 +11674,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11817,12 +11817,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11960,12 +11960,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12103,12 +12103,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12246,12 +12246,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index bdaec9b..9da601f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -101,10 +101,10 @@ define amdgpu_kernel void @local_volatile_load_0( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(1)* %out) { + ptr addrspace(3) %in, ptr addrspace(1) %out) { entry: - %val = load volatile i32, i32 addrspace(3)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load volatile i32, ptr addrspace(3) %in, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -207,12 +207,12 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(1)* %out) { + ptr addrspace(3) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid - %val = load volatile i32, i32 addrspace(3)* %val.gep, align 4 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(3) %in, i32 %tid + %val = load volatile i32, ptr addrspace(3) %val.gep, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -305,10 +305,10 @@ define amdgpu_kernel void @local_volatile_store_0( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(3)* %out) { + ptr addrspace(1) %in, ptr addrspace(3) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store volatile i32 %val, i32 addrspace(3)* %out + %val = load i32, ptr addrspace(1) %in, align 4 + store volatile i32 %val, ptr addrspace(3) %out ret void } @@ -406,12 +406,12 @@ define amdgpu_kernel void @local_volatile_store_1( ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(3)* %out) { + ptr addrspace(1) %in, ptr addrspace(3) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid - store volatile i32 %val, i32 addrspace(3)* %out.gep + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tid + store volatile i32 %val, ptr addrspace(3) %out.gep ret void } @@ -495,10 +495,10 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic volatile i32, i32 addrspace(3)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic volatile i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -575,9 +575,9 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic volatile i32 %in, i32 addrspace(3)* %out syncscope("workgroup") release, align 4 + store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll index b4289dd..212aec9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll @@ -133,10 +133,10 @@ define amdgpu_kernel void @local_wavefront_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -262,10 +262,10 @@ define amdgpu_kernel void @local_wavefront_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -391,10 +391,10 @@ define amdgpu_kernel void @local_wavefront_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -520,10 +520,10 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -627,9 +627,9 @@ define amdgpu_kernel void @local_wavefront_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") unordered, align 4 ret void } @@ -733,9 +733,9 @@ define amdgpu_kernel void @local_wavefront_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 4 ret void } @@ -839,9 +839,9 @@ define amdgpu_kernel void @local_wavefront_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") release, align 4 ret void } @@ -945,9 +945,9 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") seq_cst, align 4 ret void } @@ -1051,9 +1051,9 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") monotonic ret void } @@ -1157,9 +1157,9 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire ret void } @@ -1263,9 +1263,9 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") release ret void } @@ -1369,9 +1369,9 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel ret void } @@ -1475,9 +1475,9 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst ret void } @@ -1603,10 +1603,10 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1732,10 +1732,10 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1861,10 +1861,10 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1980,10 +1980,10 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic ret void } @@ -2099,10 +2099,10 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic ret void } @@ -2218,10 +2218,10 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic ret void } @@ -2337,10 +2337,10 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic ret void } @@ -2456,10 +2456,10 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic ret void } @@ -2575,10 +2575,10 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire ret void } @@ -2694,10 +2694,10 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire ret void } @@ -2813,10 +2813,10 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire ret void } @@ -2932,10 +2932,10 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire ret void } @@ -3051,10 +3051,10 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire ret void } @@ -3170,10 +3170,10 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst ret void } @@ -3289,10 +3289,10 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst ret void } @@ -3408,10 +3408,10 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst ret void } @@ -3527,10 +3527,10 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst ret void } @@ -3646,10 +3646,10 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst ret void } @@ -3787,12 +3787,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -3930,12 +3930,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4073,12 +4073,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4216,12 +4216,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4359,12 +4359,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4502,12 +4502,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4645,12 +4645,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4788,12 +4788,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4931,12 +4931,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5074,12 +5074,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5217,12 +5217,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5360,12 +5360,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5503,12 +5503,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5646,12 +5646,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5789,12 +5789,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5920,10 +5920,10 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront-one-as") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6049,10 +6049,10 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6178,10 +6178,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront-one-as") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6307,10 +6307,10 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("wavefront-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6414,9 +6414,9 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") unordered, align 4 ret void } @@ -6520,9 +6520,9 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") monotonic, align 4 ret void } @@ -6626,9 +6626,9 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") release, align 4 ret void } @@ -6732,9 +6732,9 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("wavefront-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") seq_cst, align 4 ret void } @@ -6838,9 +6838,9 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") monotonic ret void } @@ -6944,9 +6944,9 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire ret void } @@ -7050,9 +7050,9 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") release ret void } @@ -7156,9 +7156,9 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel ret void } @@ -7262,9 +7262,9 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst ret void } @@ -7390,10 +7390,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7519,10 +7519,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7648,10 +7648,10 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("wavefront-one-as") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -7767,10 +7767,10 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic ret void } @@ -7886,10 +7886,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic ret void } @@ -8005,10 +8005,10 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic ret void } @@ -8124,10 +8124,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic ret void } @@ -8243,10 +8243,10 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic ret void } @@ -8362,10 +8362,10 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire ret void } @@ -8481,10 +8481,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire ret void } @@ -8600,10 +8600,10 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire ret void } @@ -8719,10 +8719,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire ret void } @@ -8838,10 +8838,10 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire ret void } @@ -8957,10 +8957,10 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst ret void } @@ -9076,10 +9076,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst ret void } @@ -9195,10 +9195,10 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst ret void } @@ -9314,10 +9314,10 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst ret void } @@ -9433,10 +9433,10 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst ret void } @@ -9574,12 +9574,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -9717,12 +9717,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -9860,12 +9860,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10003,12 +10003,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10146,12 +10146,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10289,12 +10289,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10432,12 +10432,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10575,12 +10575,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10718,12 +10718,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10861,12 +10861,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11004,12 +11004,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11147,12 +11147,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11290,12 +11290,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11433,12 +11433,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11576,12 +11576,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll index e187fe3..d9a94f8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -133,10 +133,10 @@ define amdgpu_kernel void @local_workgroup_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -262,10 +262,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -395,10 +395,10 @@ define amdgpu_kernel void @local_workgroup_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -541,10 +541,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -648,9 +648,9 @@ define amdgpu_kernel void @local_workgroup_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") unordered, align 4 ret void } @@ -754,9 +754,9 @@ define amdgpu_kernel void @local_workgroup_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") monotonic, align 4 ret void } @@ -873,9 +873,9 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4 ret void } @@ -992,9 +992,9 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") seq_cst, align 4 ret void } @@ -1098,9 +1098,9 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") monotonic ret void } @@ -1217,9 +1217,9 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acquire ret void } @@ -1336,9 +1336,9 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") release ret void } @@ -1468,9 +1468,9 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acq_rel ret void } @@ -1600,9 +1600,9 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") seq_cst ret void } @@ -1732,10 +1732,10 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -1878,10 +1878,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2024,10 +2024,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -2143,10 +2143,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic ret void } @@ -2275,10 +2275,10 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic ret void } @@ -2407,10 +2407,10 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic ret void } @@ -2552,10 +2552,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic ret void } @@ -2697,10 +2697,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic ret void } @@ -2829,10 +2829,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire ret void } @@ -2961,10 +2961,10 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire ret void } @@ -3106,10 +3106,10 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release acquire ret void } @@ -3251,10 +3251,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire ret void } @@ -3396,10 +3396,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire ret void } @@ -3541,10 +3541,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst ret void } @@ -3686,10 +3686,10 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst ret void } @@ -3831,10 +3831,10 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst ret void } @@ -3976,10 +3976,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst ret void } @@ -4121,10 +4121,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst ret void } @@ -4262,12 +4262,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4409,12 +4409,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4565,12 +4565,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4725,12 +4725,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -4885,12 +4885,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5032,12 +5032,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5179,12 +5179,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5339,12 +5339,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5499,12 +5499,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5659,12 +5659,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5819,12 +5819,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -5979,12 +5979,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6139,12 +6139,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6299,12 +6299,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6459,12 +6459,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -6590,10 +6590,10 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup-one-as") unordered, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") unordered, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6719,10 +6719,10 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup-one-as") monotonic, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") monotonic, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6848,10 +6848,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup-one-as") acquire, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") acquire, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -6977,10 +6977,10 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %in, i32 addrspace(3)* %out) { + ptr addrspace(3) %in, ptr addrspace(3) %out) { entry: - %val = load atomic i32, i32 addrspace(3)* %in syncscope("workgroup-one-as") seq_cst, align 4 - store i32 %val, i32 addrspace(3)* %out + %val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") seq_cst, align 4 + store i32 %val, ptr addrspace(3) %out ret void } @@ -7084,9 +7084,9 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup-one-as") unordered, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") unordered, align 4 ret void } @@ -7190,9 +7190,9 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup-one-as") monotonic, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") monotonic, align 4 ret void } @@ -7296,9 +7296,9 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup-one-as") release, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") release, align 4 ret void } @@ -7402,9 +7402,9 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 %in, i32 addrspace(3)* %out) { + i32 %in, ptr addrspace(3) %out) { entry: - store atomic i32 %in, i32 addrspace(3)* %out syncscope("workgroup-one-as") seq_cst, align 4 + store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") seq_cst, align 4 ret void } @@ -7508,9 +7508,9 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") monotonic + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") monotonic ret void } @@ -7614,9 +7614,9 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") acquire + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acquire ret void } @@ -7720,9 +7720,9 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") release + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") release ret void } @@ -7826,9 +7826,9 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") acq_rel + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acq_rel ret void } @@ -7932,9 +7932,9 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") seq_cst + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") seq_cst ret void } @@ -8060,10 +8060,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") acquire - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acquire + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8189,10 +8189,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") acq_rel - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acq_rel + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8318,10 +8318,10 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in) { + ptr addrspace(3) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("workgroup-one-as") seq_cst - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") seq_cst + store i32 %val, ptr addrspace(3) %out, align 4 ret void } @@ -8437,10 +8437,10 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic ret void } @@ -8556,10 +8556,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic ret void } @@ -8675,10 +8675,10 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic ret void } @@ -8794,10 +8794,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic ret void } @@ -8913,10 +8913,10 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic ret void } @@ -9032,10 +9032,10 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire ret void } @@ -9151,10 +9151,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire ret void } @@ -9270,10 +9270,10 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire ret void } @@ -9389,10 +9389,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire ret void } @@ -9508,10 +9508,10 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire ret void } @@ -9627,10 +9627,10 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst ret void } @@ -9746,10 +9746,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst ret void } @@ -9865,10 +9865,10 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst ret void } @@ -9984,10 +9984,10 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst ret void } @@ -10103,10 +10103,10 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst ret void } @@ -10244,12 +10244,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10387,12 +10387,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10530,12 +10530,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10673,12 +10673,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10816,12 +10816,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -10959,12 +10959,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11102,12 +11102,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11245,12 +11245,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11388,12 +11388,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11531,12 +11531,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11674,12 +11674,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11817,12 +11817,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -11960,12 +11960,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12103,12 +12103,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } @@ -12246,12 +12246,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(3)* %out, i32 %in, i32 %old) { + ptr addrspace(3) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(3) %out, i32 4 + %val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(3)* %out, align 4 + store i32 %val0, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index bdb60df..8cbaf57 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -177,10 +177,10 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(5)* %in, i32 addrspace(1)* %out) { + ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: - %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val = load i32, ptr addrspace(5) %in, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -353,12 +353,12 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(5)* %in, i32 addrspace(1)* %out) { + ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid - %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid + %val = load i32, ptr addrspace(5) %val.gep, align 4, !nontemporal !0 + store i32 %val, ptr addrspace(1) %out ret void } @@ -526,10 +526,10 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0 glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(5)* %out) { + ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store i32 %val, i32 addrspace(5)* %out, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + store i32 %val, ptr addrspace(5) %out, !nontemporal !0 ret void } @@ -702,12 +702,12 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX11-CU-NEXT: scratch_store_b32 v0, v1, s0 glc slc dlc ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(5)* %out) { + ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid - store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0 + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(5) %out, i32 %tid + store i32 %val, ptr addrspace(5) %out.gep, !nontemporal !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll index fb3ac32..fea679a 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -121,10 +121,10 @@ define amdgpu_kernel void @private_volatile_load_0( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(5)* %in, i32 addrspace(1)* %out) { + ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: - %val = load volatile i32, i32 addrspace(5)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out + %val = load volatile i32, ptr addrspace(5) %in, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -243,12 +243,12 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(5)* %in, i32 addrspace(1)* %out) { + ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid - %val = load volatile i32, i32 addrspace(5)* %val.gep, align 4 - store i32 %val, i32 addrspace(1)* %out + %val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid + %val = load volatile i32, ptr addrspace(5) %val.gep, align 4 + store i32 %val, ptr addrspace(1) %out ret void } @@ -371,10 +371,10 @@ define amdgpu_kernel void @private_volatile_store_0( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(5)* %out) { + ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: - %val = load i32, i32 addrspace(1)* %in, align 4 - store volatile i32 %val, i32 addrspace(5)* %out + %val = load i32, ptr addrspace(1) %in, align 4 + store volatile i32 %val, ptr addrspace(5) %out ret void } @@ -500,12 +500,12 @@ define amdgpu_kernel void @private_volatile_store_1( ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm - i32 addrspace(1)* %in, i32 addrspace(5)* %out) { + ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(1)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid - store volatile i32 %val, i32 addrspace(5)* %out.gep + %val = load i32, ptr addrspace(1) %in, align 4 + %out.gep = getelementptr inbounds i32, ptr addrspace(5) %out, i32 %tid + store volatile i32 %val, ptr addrspace(5) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll index c082766..5a3b64f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll @@ -10,23 +10,23 @@ target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64: ; GCN-LABEL: {{^}}_Z6brokenPd: ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} -define amdgpu_kernel void @_Z6brokenPd(double* %arg) { +define amdgpu_kernel void @_Z6brokenPd(ptr %arg) { bb: %tmp = alloca double, align 8, addrspace(5) %tmp1 = alloca double, align 8, addrspace(5) - %tmp2 = load double, double* %arg, align 8 + %tmp2 = load double, ptr %arg, align 8 br i1 1, label %bb6, label %bb4 bb3: ; No predecessors! br label %bb4 bb4: ; preds = %bb3, %bb - %tmp5 = phi double addrspace(5)* [ %tmp1, %bb3 ], [ %tmp, %bb ] - store double %tmp2, double addrspace(5)* %tmp5, align 8 + %tmp5 = phi ptr addrspace(5) [ %tmp1, %bb3 ], [ %tmp, %bb ] + store double %tmp2, ptr addrspace(5) %tmp5, align 8 br label %bb6 bb6: ; preds = %bb4, %bb %tmp7 = phi double [ 0x7FF8123000000000, %bb4 ], [ 0x7FF8000000000000, %bb ] - store double %tmp7, double* %arg, align 8 + store double %tmp7, ptr %arg, align 8 ret void } -- 2.7.4