From 89c447e4e6b564636bbf32b15d67e40cf6c60387 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 15 Jan 2022 19:16:03 -0500 Subject: [PATCH] AMDGPU: Stop reserving 36-bytes before kernel arguments for amdpal This was inheriting the mesa behavior, and as far as I know nobody is using opencl kernels with amdpal. The isMesaKernel check was irrelevant because this property needs to be held for all functions. --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 14 +- .../CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll | 2 +- .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll | 272 +++++----- .../CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll | 162 +++--- .../CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll | 184 +++---- .../CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll | 16 +- .../CodeGen/AMDGPU/GlobalISel/store-local.128.ll | 72 +-- .../CodeGen/AMDGPU/GlobalISel/store-local.96.ll | 72 +-- llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll | 12 +- llvm/test/CodeGen/AMDGPU/ds_read2.ll | 170 +++--- llvm/test/CodeGen/AMDGPU/ds_write2.ll | 96 ++-- llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 20 +- .../CodeGen/AMDGPU/memory-legalizer-flat-agent.ll | 352 ++++++------ .../AMDGPU/memory-legalizer-flat-nontemporal.ll | 8 +- .../AMDGPU/memory-legalizer-flat-singlethread.ll | 352 ++++++------ .../CodeGen/AMDGPU/memory-legalizer-flat-system.ll | 352 ++++++------ .../AMDGPU/memory-legalizer-flat-volatile.ll | 14 +- .../AMDGPU/memory-legalizer-flat-wavefront.ll | 348 ++++++------ .../AMDGPU/memory-legalizer-flat-workgroup.ll | 336 ++++++------ .../AMDGPU/memory-legalizer-global-agent.ll | 348 ++++++------ .../AMDGPU/memory-legalizer-global-nontemporal.ll | 8 +- .../AMDGPU/memory-legalizer-global-singlethread.ll | 352 ++++++------ .../AMDGPU/memory-legalizer-global-system.ll | 332 +++++------ .../AMDGPU/memory-legalizer-global-volatile.ll | 14 +- .../AMDGPU/memory-legalizer-global-wavefront.ll | 352 ++++++------ .../AMDGPU/memory-legalizer-global-workgroup.ll | 352 ++++++------ .../CodeGen/AMDGPU/memory-legalizer-local-agent.ll | 604 ++++++++++----------- .../AMDGPU/memory-legalizer-local-nontemporal.ll | 16 +- .../AMDGPU/memory-legalizer-local-singlethread.ll | 604 ++++++++++----------- .../AMDGPU/memory-legalizer-local-system.ll | 604 ++++++++++----------- .../AMDGPU/memory-legalizer-local-volatile.ll | 20 +- .../AMDGPU/memory-legalizer-local-wavefront.ll | 604 ++++++++++----------- .../AMDGPU/memory-legalizer-local-workgroup.ll | 604 ++++++++++----------- .../AMDGPU/memory-legalizer-private-nontemporal.ll | 16 +- .../AMDGPU/memory-legalizer-private-volatile.ll | 16 +- llvm/test/CodeGen/AMDGPU/store-local.128.ll | 96 ++-- llvm/test/CodeGen/AMDGPU/store-local.96.ll | 96 ++-- 37 files changed, 3802 insertions(+), 4090 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 88ed4b2..7f1b94b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -212,7 +212,19 @@ public: /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdHsaOrMesa(F) ? 0 : 36; + switch (TargetTriple.getOS()) { + case Triple::AMDHSA: + case Triple::AMDPAL: + case Triple::Mesa3D: + return 0; + case Triple::UnknownOS: + default: + // For legacy reasons unknown/other is treated as a different version of + // mesa. + return 36; + } + + llvm_unreachable("invalid triple OS"); } /// \returns Maximum number of work groups per compute unit supported by the diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 452616e..baf0b4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -5183,7 +5183,7 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C1]], [[C2]](s64) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 01d5307..3adb755 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -211,11 +211,11 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dword s5, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dword s5, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -231,11 +231,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; ; GFX8-LABEL: test_div_fmas_f32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 +; GFX8-NEXT: s_load_dword s5, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -252,11 +252,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; GFX10_W32-LABEL: test_div_fmas_f32: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x4 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 @@ -270,11 +270,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; GFX10_W64-LABEL: test_div_fmas_f32: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x4 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 @@ -292,10 +292,10 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x25 -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -310,10 +310,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x94 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -329,10 +329,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -345,10 +345,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -365,10 +365,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX7-NEXT: s_load_dword s3, s[0:1], 0xd -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x16 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -383,10 +383,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x34 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x8 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x10 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -402,10 +402,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x34 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x2c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x10 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -418,10 +418,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x34 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x2c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x10 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -438,10 +438,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -456,10 +456,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -475,10 +475,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -491,10 +491,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -511,8 +511,8 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x11 +; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x8 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s6 ; GFX7-NEXT: v_mov_b32_e32 v2, s8 @@ -531,8 +531,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; ; GFX8-LABEL: test_div_fmas_f64: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 -; GFX8-NEXT: s_load_dword s0, s[0:1], 0x44 +; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 +; GFX8-NEXT: s_load_dword s0, s[0:1], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s6 ; GFX8-NEXT: v_mov_b32_e32 v2, s8 @@ -552,8 +552,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; GFX10_W32-LABEL: test_div_fmas_f64: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x44 -; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x20 +; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s2 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8 @@ -569,8 +569,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; GFX10_W64-LABEL: test_div_fmas_f64: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x44 -; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x20 +; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s2 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8 @@ -590,8 +590,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) { ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_cmp_eq_u32 s7, 0 @@ -609,8 +609,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_cmp_eq_u32 s7, 0 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0 @@ -629,8 +629,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 @@ -646,8 +646,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 @@ -668,10 +668,10 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -685,11 +685,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, 0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -703,10 +703,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 @@ -719,10 +719,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -739,10 +739,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -756,11 +756,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, -1 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -774,10 +774,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 @@ -790,10 +790,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -810,8 +810,8 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], i32 %d) { ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX7-NEXT: s_load_dword s8, s[0:1], 0x15 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s8, s[0:1], 0xc ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 @@ -838,8 +838,8 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x54 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x30 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, s6 @@ -873,9 +873,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W32: ; %bb.0: -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x54 +; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc @@ -896,9 +896,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W64: ; %bb.0: -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x54 +; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc @@ -938,20 +938,20 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, [8 x i32], float addrspace(1)* %in, [8 x i32], i32 addrspace(1)* %dummy) { ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xa ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7-NEXT: s_cbranch_execz .LBB13_2 ; GFX7-NEXT: ; %bb.1: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x1d +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x14 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -971,7 +971,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_mov_b32 s6, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -980,12 +980,12 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2] -; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX8-NEXT: s_cbranch_execz .LBB13_2 ; GFX8-NEXT: ; %bb.1: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1008,18 +1008,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W32: ; %bb.0: ; %entry -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_mov_b32 s5, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3] ; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W32-NEXT: ; %bb.1: ; %bb -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) @@ -1038,18 +1038,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W64: ; %bb.0: ; %entry -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_mov_b32 s6, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3] ; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W64-NEXT: ; %bb.1: ; %bb -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index 2ece383..fa0d862 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_arg_imm: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -64,9 +64,9 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_arg_imm_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -84,9 +84,9 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_imm_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -105,10 +105,10 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { ; GFX6-LABEL: v_bfe_print_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80002 @@ -125,9 +125,9 @@ define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -144,8 +144,8 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 8 @@ -161,10 +161,10 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -183,10 +183,10 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -205,10 +205,10 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -227,10 +227,10 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f @@ -247,10 +247,10 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001 @@ -267,10 +267,10 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180008 @@ -287,10 +287,10 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80018 @@ -307,10 +307,10 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s3, s3, 31 @@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, 31 @@ -349,7 +349,7 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -365,7 +365,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -381,7 +381,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -397,7 +397,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -413,7 +413,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -429,7 +429,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x10007 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -446,7 +446,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -463,7 +463,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -480,7 +480,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80006 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -497,7 +497,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80010 ; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -514,7 +514,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -531,7 +531,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x40004 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -548,7 +548,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -565,7 +565,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -582,7 +582,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -599,7 +599,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_15: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -616,7 +616,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -632,7 +632,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_17: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -649,7 +649,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_18: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -666,10 +666,10 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_sext_in_reg_i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000 @@ -690,8 +690,8 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3 ; GFX6-LABEL: simplify_demanded_bfe_sdiv: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0 -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 @@ -731,10 +731,10 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_0_width: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 8 @@ -751,11 +751,11 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s4, 0x80000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, s4 @@ -774,10 +774,10 @@ define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000 @@ -797,10 +797,10 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1) define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_16_bfe_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000 @@ -820,9 +820,9 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1) define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, s4 @@ -843,9 +843,9 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, s4 @@ -866,12 +866,12 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -890,12 +890,12 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -914,10 +914,10 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000 @@ -937,10 +937,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 @@ -960,10 +960,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index 821d0fd..782d0b9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s2, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb +; GFX6-NEXT: s_load_dword s2, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -65,9 +65,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_imm: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -85,9 +85,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_imm_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -105,9 +105,9 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_imm_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -126,9 +126,9 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -145,8 +145,8 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 8 @@ -162,12 +162,12 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zextload_i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -184,10 +184,10 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -208,10 +208,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -232,10 +232,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -256,10 +256,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -280,10 +280,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -304,10 +304,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 @@ -348,10 +348,10 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -370,10 +370,10 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -392,10 +392,10 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 @@ -415,10 +415,10 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000 @@ -438,10 +438,10 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -460,10 +460,10 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -482,10 +482,10 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -504,10 +504,10 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f @@ -524,10 +524,10 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001 @@ -544,10 +544,10 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x180008 @@ -564,10 +564,10 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80018 @@ -585,10 +585,10 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s3, s3, 31 @@ -606,10 +606,10 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, 31 @@ -627,7 +627,7 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -643,7 +643,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -659,7 +659,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -675,7 +675,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -691,7 +691,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -707,7 +707,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x10007 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -724,7 +724,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -741,7 +741,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -758,7 +758,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80006 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -775,7 +775,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80010 ; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -792,7 +792,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -809,7 +809,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x40004 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -826,7 +826,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -843,7 +843,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -860,7 +860,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -877,7 +877,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_15: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -894,7 +894,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -910,7 +910,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_17: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -927,7 +927,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_18: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -948,13 +948,13 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s8, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s8, s8, 63 @@ -977,8 +977,8 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: lshr_and: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -995,9 +995,9 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: v_lshr_and: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, s4 @@ -1015,8 +1015,8 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -1033,8 +1033,8 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -1051,8 +1051,8 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: shl_lshr: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x150002 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll index 34db791..7e5ecaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -213,7 +213,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) { define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; GFX7-LABEL: mulu24_shl64: ; GFX7: ; %bb.0: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX7-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, 0 @@ -226,7 +226,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX8-LABEL: mulu24_shl64: ; GFX8: ; %bb.0: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -241,7 +241,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX9-LABEL: mulu24_shl64: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -256,7 +256,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX10-LABEL: mulu24_shl64: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, 7, v0 @@ -281,7 +281,7 @@ bb: define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) { ; GFX7-LABEL: muli24_shl64: ; GFX7: ; %bb.0: ; %bb -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s6, 0 @@ -303,7 +303,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX8-LABEL: muli24_shl64: ; GFX8: ; %bb.0: ; %bb -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 3, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -326,7 +326,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX9-LABEL: muli24_shl64: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -341,7 +341,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX10-LABEL: muli24_shl64: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll index 244325b..b662b21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll @@ -9,8 +9,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -22,8 +22,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX7-LABEL: store_lds_v4i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -37,8 +37,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; GFX10-LABEL: store_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -54,8 +54,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -111,8 +111,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s1, 0x80008 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -166,8 +166,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -227,8 +227,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshr_b32 s0, s4, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -255,8 +255,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_lshr_b32 s1, s4, 16 @@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -317,8 +317,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -331,8 +331,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -347,8 +347,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -365,8 +365,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -378,8 +378,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -393,8 +393,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -423,8 +423,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -438,8 +438,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll index 7246858..d19684b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll @@ -9,8 +9,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-NEXT: v_mov_b32_e32 v1, s13 @@ -21,8 +21,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX7-LABEL: store_lds_v3i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s12 @@ -35,8 +35,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX10-LABEL: store_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s12 ; GFX10-NEXT: v_mov_b32_e32 v1, s13 @@ -51,8 +51,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -96,8 +96,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s1, 0x80008 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_lshr_b32 s1, s4, 16 @@ -189,8 +189,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshr_b32 s0, s4, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -212,8 +212,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_lshr_b32 s1, s4, 16 @@ -237,8 +237,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -264,8 +264,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -277,8 +277,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -292,8 +292,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -337,8 +337,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -354,8 +354,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-NEXT: v_mov_b32_e32 v1, s13 @@ -366,8 +366,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s12 @@ -380,8 +380,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s12 ; GFX10-NEXT: v_mov_b32_e32 v1, s13 diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll index bd173aa..41d33f9 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -44,7 +44,7 @@ entry: define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 { ; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; CI: ; %bb.0: ; %entry -; CI-NEXT: s_load_dword s0, s[0:1], 0x9 +; CI-NEXT: s_load_dword s0, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -64,7 +64,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0 @@ -81,7 +81,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b @@ -291,7 +291,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 { ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s0, s[0:1], 0x9 +; CI-NEXT: s_load_dword s0, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -312,7 +312,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0 @@ -330,7 +330,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 260c638..c44c597 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -15,7 +15,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -28,7 +28,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -51,7 +51,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:255 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -64,7 +64,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:255 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -88,7 +88,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:1028 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -102,7 +102,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:1028 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -126,7 +126,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -142,7 +142,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8 ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -184,7 +184,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_barrier ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: v_add_f32_e32 v1, v1, v2 ; CI-NEXT: s_mov_b32 s2, 0 @@ -202,7 +202,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_barrier ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -245,7 +245,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:2 offset1:8 ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -261,7 +261,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset0:2 offset1:8 ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -301,9 +301,9 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: read2_ptr_is_subreg_arg_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 @@ -320,9 +320,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, ; ; GFX9-LABEL: read2_ptr_is_subreg_arg_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s3 @@ -354,9 +354,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: read2_ptr_is_subreg_arg_offset_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 @@ -373,9 +373,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1) ; ; GFX9-LABEL: read2_ptr_is_subreg_arg_offset_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s3 @@ -410,7 +410,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -423,7 +423,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -453,7 +453,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:32 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -467,7 +467,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -491,7 +491,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:32 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -505,7 +505,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -526,10 +526,10 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -565,8 +565,8 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a ; ; GFX9-ALIGNED-LABEL: unaligned_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s4, v0 @@ -594,9 +594,9 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a ; ; GFX9-UNALIGNED-LABEL: unaligned_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add_u32_e32 v0, s2, v2 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 @@ -619,10 +619,10 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_offset_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -658,8 +658,8 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, ; ; GFX9-ALIGNED-LABEL: unaligned_offset_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s4, v0 @@ -687,9 +687,9 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, ; ; GFX9-UNALIGNED-LABEL: unaligned_offset_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add3_u32 v0, s2, v2, 5 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 @@ -715,10 +715,10 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_2_simple_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -741,9 +741,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou ; ; GFX9-ALIGNED-LABEL: misaligned_2_simple_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-ALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s2, v0 ; GFX9-ALIGNED-NEXT: ds_read_u16 v2, v1 @@ -760,9 +760,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou ; ; GFX9-UNALIGNED-LABEL: misaligned_2_simple_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add_u32_e32 v0, s2, v2 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 @@ -788,7 +788,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_mov_b32_e32 v5, 0 @@ -801,7 +801,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -824,7 +824,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_mov_b32_e32 v5, 0 @@ -837,7 +837,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -861,7 +861,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[1:2], v0 ; CI-NEXT: ds_read_b64 v[3:4], v0 offset:2056 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -875,7 +875,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read_b64 v[0:1], v4 ; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:2056 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -896,10 +896,10 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_read2_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v3, vcc, s2, v0 @@ -914,9 +914,9 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl ; ; GFX9-LABEL: misaligned_read2_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, s2, v4 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:1 @@ -945,7 +945,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[0:1], v0 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -957,7 +957,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read_b64 v[0:1], v2 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -975,7 +975,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:2 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -987,7 +987,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:2 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -1007,7 +1007,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b128 v[0:3], v0 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1020,7 +1020,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: ds_read_b128 v[0:3], v4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -1042,7 +1042,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[0:1], v2 offset:16384 ; CI-NEXT: ds_read_b64 v[2:3], v2 offset:32760 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1056,7 +1056,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: ds_read_b64 v[0:1], v4 offset:16384 ; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:32760 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -1075,7 +1075,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { ; CI-LABEL: sgemm_inner_loop_read2_sequence: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_lshl_b32 s0, s2, 2 ; CI-NEXT: s_add_i32 s1, s0, 0xc20 ; CI-NEXT: s_addk_i32 s0, 0xc60 @@ -1123,7 +1123,7 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* % ; GFX9-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX9-NEXT: s_waitcnt lgkmcnt(2) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v6 @@ -1179,8 +1179,8 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* % define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { ; CI-LABEL: misaligned_read2_v2i32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1193,8 +1193,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, ; ; GFX9-LABEL: misaligned_read2_v2i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -1210,8 +1210,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { ; CI-LABEL: misaligned_read2_i64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1224,8 +1224,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr ; ; GFX9-LABEL: misaligned_read2_i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -1241,8 +1241,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr define amdgpu_kernel void @ds_read_diff_base_interleaving( ; CI-LABEL: ds_read_diff_base_interleaving: ; CI: ; %bb.0: ; %bb -; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; CI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 @@ -1272,10 +1272,10 @@ define amdgpu_kernel void @ds_read_diff_base_interleaving( ; ; GFX9-LABEL: ds_read_diff_base_interleaving: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, s4, v1 @@ -1348,10 +1348,10 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_add_u32 s40, s40, s11 ; CI-NEXT: s_mov_b64 s[10:11], s[6:7] -; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; CI-NEXT: s_load_dword s6, s[4:5], 0xb +; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 +; CI-NEXT: s_load_dword s6, s[4:5], 0x2 ; CI-NEXT: s_addc_u32 s41, s41, 0 -; CI-NEXT: s_add_u32 s8, s4, 48 +; CI-NEXT: s_add_u32 s8, s4, 12 ; CI-NEXT: s_addc_u32 s9, s5, 0 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 @@ -1393,9 +1393,9 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; GFX9-NEXT: s_add_u32 s36, s36, s11 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX9-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 -; GFX9-NEXT: s_add_u32 s8, s4, 48 +; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GFX9-NEXT: s_add_u32 s8, s4, 12 ; GFX9-NEXT: s_addc_u32 s9, s5, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 @@ -1487,7 +1487,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; CI-NEXT: ds_read_u8 v6, v0 offset:66 ; CI-NEXT: ds_read_u8 v0, v0 offset:65 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_or_b32_e32 v1, v2, v1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v6 @@ -1514,7 +1514,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; GFX9-ALIGNED-NEXT: ds_read_u8 v8, v2 offset:71 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(7) ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v7 @@ -1531,7 +1531,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; GFX9-UNALIGNED-LABEL: read2_v2i32_align1_odd_offset: ; GFX9-UNALIGNED: ; %bb.0: ; %entry ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x41 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index 501282d..268e8bb 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -9,7 +9,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_one_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -23,7 +23,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo ; ; GFX9-LABEL: simple_write2_one_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -44,7 +44,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -60,7 +60,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo ; ; GFX9-LABEL: simple_write2_two_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -85,7 +85,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_volatile_0: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -105,7 +105,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_f32_volatile_0: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -131,7 +131,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_volatile_1: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -151,7 +151,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_f32_volatile_1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -182,7 +182,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg2_mixed_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 @@ -199,7 +199,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa ; ; GFX9-LABEL: simple_write2_two_val_subreg2_mixed_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ; kill: killed $vgpr4 @@ -229,7 +229,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 @@ -244,7 +244,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_subreg2_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -268,7 +268,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg4_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v0 @@ -283,7 +283,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_subreg4_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -307,7 +307,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_max_offset_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -323,7 +323,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_max_offset_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -348,7 +348,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_too_far_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -368,7 +368,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_too_far_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -394,7 +394,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_x2: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, ; ; GFX9-LABEL: simple_write2_two_val_f32_x2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -450,7 +450,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_x2_nonzero_base: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -469,7 +469,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs ; ; GFX9-LABEL: simple_write2_two_val_f32_x2_nonzero_base: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -506,8 +506,8 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: write2_ptr_subreg_arg_two_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xf +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x6 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -529,8 +529,8 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* ; ; GFX9-LABEL: write2_ptr_subreg_arg_two_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x3c +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x18 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[4:5] @@ -566,7 +566,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -580,7 +580,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do ; ; GFX9-LABEL: simple_write2_one_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] @@ -601,8 +601,8 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; CI-NEXT: s_load_dword s0, s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; CI-NEXT: s_load_dword s0, s[0:1], 0x4 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -618,8 +618,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace ; ; GFX9-LABEL: misaligned_simple_write2_one_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -642,8 +642,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_offset_simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; CI-NEXT: s_load_dword s0, s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; CI-NEXT: s_load_dword s0, s[0:1], 0x4 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -675,8 +675,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add ; ; GFX9-ALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -702,8 +702,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add ; ; GFX9-UNALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -731,7 +731,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -747,7 +747,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do ; ; GFX9-LABEL: simple_write2_two_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] glc @@ -873,7 +873,7 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 { ; CI-LABEL: write2_sgemm_sequence: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x4 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -895,7 +895,7 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld ; ; GFX9-LABEL: write2_sgemm_sequence: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x10 ; GFX9-NEXT: s_lshl_b32 s2, s2, 2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -950,8 +950,8 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_v4f32_superreg_align4: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; CI-NEXT: s_load_dword s4, s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; CI-NEXT: s_load_dword s4, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -968,8 +968,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs ; ; GFX9-ALIGNED-LABEL: simple_write2_v4f32_superreg_align4: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_lshl_add_u32 v0, v0, 4, s4 ; GFX9-ALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -984,8 +984,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs ; ; GFX9-UNALIGNED-LABEL: simple_write2_v4f32_superreg_align4: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_lshl_add_u32 v4, v0, 4, s4 ; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 09e2e90..abb1204 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -262,7 +262,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -289,7 +289,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15 @@ -893,7 +893,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -923,7 +923,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX1010-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) @@ -951,7 +951,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX1030-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -1665,7 +1665,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -1695,7 +1695,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX1010-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) @@ -1723,7 +1723,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX1030-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -2329,7 +2329,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -2354,7 +2354,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX10-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index ff90763..4ef2355 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -320,7 +320,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -406,8 +406,8 @@ define amdgpu_kernel void @flat_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -478,8 +478,8 @@ define amdgpu_kernel void @flat_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -555,8 +555,8 @@ define amdgpu_kernel void @flat_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -635,8 +635,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -710,8 +710,8 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -792,8 +792,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -874,8 +874,8 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -964,8 +964,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1059,8 +1059,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1150,8 +1150,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1347,8 +1347,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1440,8 +1440,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1535,8 +1535,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1630,8 +1630,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1733,8 +1733,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1841,8 +1841,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1944,8 +1944,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2044,8 +2044,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2257,8 +2257,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2365,8 +2365,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2473,8 +2473,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2581,8 +2581,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2689,8 +2689,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2905,8 +2905,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -3010,8 +3010,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3122,8 +3122,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3236,8 +3236,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3356,8 +3356,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3478,8 +3478,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3595,8 +3595,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3709,8 +3709,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3828,8 +3828,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3950,8 +3950,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4072,8 +4072,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4194,8 +4194,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4316,8 +4316,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4438,8 +4438,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4560,8 +4560,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4682,8 +4682,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4777,7 +4777,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4862,7 +4862,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4955,7 +4955,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5057,7 +5057,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5145,8 +5145,8 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5294,8 +5294,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5374,8 +5374,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5449,8 +5449,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5529,8 +5529,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5611,8 +5611,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5699,8 +5699,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5792,8 +5792,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5886,8 +5886,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5987,8 +5987,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6091,8 +6091,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6185,8 +6185,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6278,8 +6278,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6373,8 +6373,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6474,8 +6474,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6580,8 +6580,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6681,8 +6681,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6779,8 +6779,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6882,8 +6882,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6988,8 +6988,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7200,8 +7200,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7306,8 +7306,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7412,8 +7412,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7518,8 +7518,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7624,8 +7624,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7729,8 +7729,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7844,8 +7844,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7960,8 +7960,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8083,8 +8083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8210,8 +8210,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8332,8 +8332,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8451,8 +8451,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8575,8 +8575,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8702,8 +8702,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8829,8 +8829,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8956,8 +8956,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9083,8 +9083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9210,8 +9210,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9337,8 +9337,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9464,8 +9464,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll index f8b7bd9a..ab79a4c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -137,7 +137,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -230,7 +230,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -319,7 +319,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll index ce46d51..49f2733 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8204,8 +8204,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 20003772..b9a3070 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -322,7 +322,7 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -410,8 +410,8 @@ define amdgpu_kernel void @flat_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -482,8 +482,8 @@ define amdgpu_kernel void @flat_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -559,8 +559,8 @@ define amdgpu_kernel void @flat_system_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -641,8 +641,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -718,8 +718,8 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -800,8 +800,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -884,8 +884,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -976,8 +976,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1075,8 +1075,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1170,8 +1170,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1269,8 +1269,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1373,8 +1373,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1470,8 +1470,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1565,8 +1565,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1662,8 +1662,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1767,8 +1767,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1879,8 +1879,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1986,8 +1986,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2088,8 +2088,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2195,8 +2195,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2307,8 +2307,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2419,8 +2419,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2531,8 +2531,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2643,8 +2643,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2755,8 +2755,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2867,8 +2867,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2979,8 +2979,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -3088,8 +3088,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3200,8 +3200,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3316,8 +3316,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3438,8 +3438,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3564,8 +3564,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3685,8 +3685,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3801,8 +3801,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3922,8 +3922,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4048,8 +4048,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4174,8 +4174,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4300,8 +4300,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4426,8 +4426,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4552,8 +4552,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4678,8 +4678,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4804,8 +4804,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4903,7 +4903,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4988,7 +4988,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5081,7 +5081,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5185,7 +5185,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5275,8 +5275,8 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5347,8 +5347,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5424,8 +5424,8 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5506,8 +5506,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5583,8 +5583,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5663,8 +5663,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5747,8 +5747,8 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5837,8 +5837,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5934,8 +5934,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6032,8 +6032,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6135,8 +6135,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6243,8 +6243,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6341,8 +6341,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6434,8 +6434,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6531,8 +6531,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6634,8 +6634,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6744,8 +6744,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6849,8 +6849,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6949,8 +6949,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7054,8 +7054,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7164,8 +7164,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7274,8 +7274,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7384,8 +7384,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7494,8 +7494,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7604,8 +7604,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7714,8 +7714,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7824,8 +7824,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7933,8 +7933,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8048,8 +8048,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8166,8 +8166,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8291,8 +8291,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8422,8 +8422,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8548,8 +8548,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8669,8 +8669,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8795,8 +8795,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8926,8 +8926,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9057,8 +9057,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9188,8 +9188,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9319,8 +9319,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9450,8 +9450,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9581,8 +9581,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9712,8 +9712,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll index 397ab8b5..88a5ccb 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -116,7 +116,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -183,7 +183,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -250,7 +250,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 @@ -317,7 +317,7 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -377,8 +377,8 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index 47c0cbc..1569af9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index 179de4d..ead08ed 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -221,7 +221,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -316,7 +316,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -403,8 +403,8 @@ define amdgpu_kernel void @flat_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -475,8 +475,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -551,8 +551,8 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -630,8 +630,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -705,8 +705,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -782,8 +782,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -862,8 +862,8 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -946,8 +946,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1034,8 +1034,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1120,8 +1120,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1211,8 +1211,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1305,8 +1305,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1397,8 +1397,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1487,8 +1487,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1580,8 +1580,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1677,8 +1677,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1778,8 +1778,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1875,8 +1875,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1969,8 +1969,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2067,8 +2067,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2168,8 +2168,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2269,8 +2269,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2370,8 +2370,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2474,8 +2474,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2584,8 +2584,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2697,8 +2697,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2814,8 +2814,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2933,8 +2933,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3048,8 +3048,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3160,8 +3160,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3276,8 +3276,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3395,8 +3395,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3514,8 +3514,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3633,8 +3633,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3752,8 +3752,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3871,8 +3871,8 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3990,8 +3990,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4109,8 +4109,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4204,7 +4204,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4289,7 +4289,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4376,7 +4376,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4466,7 +4466,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4549,8 +4549,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4621,8 +4621,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4695,8 +4695,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4770,8 +4770,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4843,8 +4843,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4917,8 +4917,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4993,8 +4993,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5070,8 +5070,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5149,8 +5149,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5232,8 +5232,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5322,8 +5322,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5413,8 +5413,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5503,8 +5503,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5590,8 +5590,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5679,8 +5679,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5769,8 +5769,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5861,8 +5861,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5951,8 +5951,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6040,8 +6040,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6131,8 +6131,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6223,8 +6223,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6315,8 +6315,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6407,8 +6407,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6499,8 +6499,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6591,8 +6591,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6683,8 +6683,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6775,8 +6775,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6875,8 +6875,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6984,8 +6984,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7319,8 +7319,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7430,8 +7430,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7540,8 +7540,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7652,8 +7652,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7765,8 +7765,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7878,8 +7878,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7991,8 +7991,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8104,8 +8104,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8217,8 +8217,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8330,8 +8330,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8443,8 +8443,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index d57b42d..43120f5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -244,7 +244,7 @@ define amdgpu_kernel void @global_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -346,7 +346,7 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -435,8 +435,8 @@ define amdgpu_kernel void @global_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -516,8 +516,8 @@ define amdgpu_kernel void @global_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -603,8 +603,8 @@ define amdgpu_kernel void @global_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -693,8 +693,8 @@ define amdgpu_kernel void @global_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -777,8 +777,8 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -868,8 +868,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -960,8 +960,8 @@ define amdgpu_kernel void @global_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1060,8 +1060,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1165,8 +1165,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1268,8 +1268,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1378,8 +1378,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1491,8 +1491,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1590,8 +1590,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1689,8 +1689,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1789,8 +1789,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1897,8 +1897,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2010,8 +2010,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2117,8 +2117,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2221,8 +2221,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2331,8 +2331,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2444,8 +2444,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2557,8 +2557,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2670,8 +2670,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2783,8 +2783,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2896,8 +2896,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3009,8 +3009,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3122,8 +3122,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3229,8 +3229,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3342,8 +3342,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3579,8 +3579,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3703,8 +3703,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3936,8 +3936,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4057,8 +4057,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4181,8 +4181,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4305,8 +4305,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4429,8 +4429,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4677,8 +4677,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4801,8 +4801,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4925,8 +4925,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5025,7 +5025,7 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5115,7 +5115,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5211,7 +5211,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5313,7 +5313,7 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5483,8 +5483,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5835,8 +5835,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6027,8 +6027,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6132,8 +6132,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6235,8 +6235,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6345,8 +6345,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6557,8 +6557,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6656,8 +6656,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6756,8 +6756,8 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6864,8 +6864,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6977,8 +6977,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7084,8 +7084,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7188,8 +7188,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7298,8 +7298,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7411,8 +7411,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7524,8 +7524,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7637,8 +7637,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7750,8 +7750,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7863,8 +7863,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7976,8 +7976,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8089,8 +8089,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8196,8 +8196,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8309,8 +8309,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8430,8 +8430,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8554,8 +8554,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8672,8 +8672,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8787,8 +8787,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8908,8 +8908,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9032,8 +9032,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9156,8 +9156,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9280,8 +9280,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9404,8 +9404,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9528,8 +9528,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9652,8 +9652,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9776,8 +9776,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll index d7be355..0261671 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -155,7 +155,7 @@ define amdgpu_kernel void @global_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 @@ -251,7 +251,7 @@ define amdgpu_kernel void @global_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -345,7 +345,7 @@ define amdgpu_kernel void @global_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll index d881b3d..949b5a5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -238,7 +238,7 @@ define amdgpu_kernel void @global_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -328,7 +328,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -414,8 +414,8 @@ define amdgpu_kernel void @global_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -576,8 +576,8 @@ define amdgpu_kernel void @global_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -657,8 +657,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -738,8 +738,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -819,8 +819,8 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -900,8 +900,8 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -981,8 +981,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index da895c6..93a58c1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -244,7 +244,7 @@ define amdgpu_kernel void @global_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -348,7 +348,7 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -439,8 +439,8 @@ define amdgpu_kernel void @global_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -520,8 +520,8 @@ define amdgpu_kernel void @global_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -607,8 +607,8 @@ define amdgpu_kernel void @global_system_release_store( ; ; SKIP-CACHE-INV-LABEL: global_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -699,8 +699,8 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -785,8 +785,8 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -876,8 +876,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -970,8 +970,8 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1072,8 +1072,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1181,8 +1181,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1288,8 +1288,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1400,8 +1400,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1517,8 +1517,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1620,8 +1620,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1719,8 +1719,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1821,8 +1821,8 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1931,8 +1931,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2048,8 +2048,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2159,8 +2159,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2265,8 +2265,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2377,8 +2377,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2494,8 +2494,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2611,8 +2611,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2728,8 +2728,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2839,8 +2839,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2952,8 +2952,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3075,8 +3075,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3203,8 +3203,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3325,8 +3325,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3442,8 +3442,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3693,8 +3693,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3949,8 +3949,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4077,8 +4077,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4205,8 +4205,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_relese_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4333,8 +4333,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4461,8 +4461,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4565,7 +4565,7 @@ define amdgpu_kernel void @global_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4655,7 +4655,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4751,7 +4751,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4855,7 +4855,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4946,8 +4946,8 @@ define amdgpu_kernel void @global_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5027,8 +5027,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5114,8 +5114,8 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5206,8 +5206,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5292,8 +5292,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5383,8 +5383,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5477,8 +5477,8 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5579,8 +5579,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5688,8 +5688,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5795,8 +5795,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5907,8 +5907,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6024,8 +6024,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6127,8 +6127,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6226,8 +6226,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6328,8 +6328,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6438,8 +6438,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6666,8 +6666,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6772,8 +6772,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6884,8 +6884,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7001,8 +7001,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7118,8 +7118,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7235,8 +7235,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7352,8 +7352,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7469,8 +7469,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7586,8 +7586,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7703,8 +7703,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7814,8 +7814,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7927,8 +7927,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8044,8 +8044,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8168,8 +8168,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8296,8 +8296,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8418,8 +8418,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8535,8 +8535,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8658,8 +8658,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8786,8 +8786,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8914,8 +8914,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9042,8 +9042,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9170,8 +9170,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9298,8 +9298,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9426,8 +9426,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9554,8 +9554,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll index b2ee94f..dacc965 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll @@ -56,7 +56,7 @@ define amdgpu_kernel void @global_volatile_load_0( ; ; SKIP-CACHE-INV-LABEL: global_volatile_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -132,7 +132,7 @@ define amdgpu_kernel void @global_volatile_load_1( ; ; SKIP-CACHE-INV-LABEL: global_volatile_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 @@ -210,7 +210,7 @@ define amdgpu_kernel void @global_volatile_store_0( ; ; SKIP-CACHE-INV-LABEL: global_volatile_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -287,7 +287,7 @@ define amdgpu_kernel void @global_volatile_store_1( ; ; SKIP-CACHE-INV-LABEL: global_volatile_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -361,7 +361,7 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -432,8 +432,8 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll index 5c67427..3fe2c7b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -238,7 +238,7 @@ define amdgpu_kernel void @global_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -328,7 +328,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -414,8 +414,8 @@ define amdgpu_kernel void @global_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -576,8 +576,8 @@ define amdgpu_kernel void @global_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -657,8 +657,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -738,8 +738,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -819,8 +819,8 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -900,8 +900,8 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -981,8 +981,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index 2f36a6d9..339cb98 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -239,7 +239,7 @@ define amdgpu_kernel void @global_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -334,7 +334,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -422,8 +422,8 @@ define amdgpu_kernel void @global_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -503,8 +503,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -589,8 +589,8 @@ define amdgpu_kernel void @global_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -678,8 +678,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -762,8 +762,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -845,8 +845,8 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -933,8 +933,8 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1024,8 +1024,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1117,8 +1117,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1212,8 +1212,8 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1315,8 +1315,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1421,8 +1421,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1519,8 +1519,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1610,8 +1610,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1706,8 +1706,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1805,8 +1805,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1906,8 +1906,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2002,8 +2002,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2095,8 +2095,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2193,8 +2193,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2294,8 +2294,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2395,8 +2395,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2496,8 +2496,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2597,8 +2597,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2698,8 +2698,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2799,8 +2799,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2900,8 +2900,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3004,8 +3004,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3341,8 +3341,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3570,8 +3570,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3679,8 +3679,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3793,8 +3793,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3910,8 +3910,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4027,8 +4027,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4144,8 +4144,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4261,8 +4261,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4378,8 +4378,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4495,8 +4495,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4612,8 +4612,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4711,7 +4711,7 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4801,7 +4801,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4892,7 +4892,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4985,7 +4985,7 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5072,8 +5072,8 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5153,8 +5153,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5236,8 +5236,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5320,8 +5320,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5485,8 +5485,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5656,8 +5656,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5837,8 +5837,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5937,8 +5937,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6038,8 +6038,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6134,8 +6134,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6225,8 +6225,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6318,8 +6318,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6412,8 +6412,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6508,8 +6508,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6602,8 +6602,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6695,8 +6695,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6790,8 +6790,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6886,8 +6886,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7078,8 +7078,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7174,8 +7174,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7270,8 +7270,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7366,8 +7366,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7462,8 +7462,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7564,8 +7564,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7672,8 +7672,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7782,8 +7782,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7893,8 +7893,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8005,8 +8005,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8115,8 +8115,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8224,8 +8224,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8335,8 +8335,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8447,8 +8447,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8559,8 +8559,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8671,8 +8671,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8783,8 +8783,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8895,8 +8895,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9007,8 +9007,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9119,8 +9119,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll index b3a3059..a328fd3 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll index 664d69d..df3e522 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -64,8 +64,8 @@ define amdgpu_kernel void @local_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -165,8 +165,8 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 @@ -265,8 +265,8 @@ define amdgpu_kernel void @local_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -363,8 +363,8 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll index 8462450..67ce190 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -233,7 +233,7 @@ define amdgpu_kernel void @local_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -321,7 +321,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -401,7 +401,7 @@ define amdgpu_kernel void @local_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -474,7 +474,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -547,7 +547,7 @@ define amdgpu_kernel void @local_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -620,7 +620,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -693,7 +693,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -766,7 +766,7 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -839,7 +839,7 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -912,7 +912,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -985,7 +985,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll index 64129cd..4efd46d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_system_release_store( ; ; SKIP-CACHE-INV-LABEL: local_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index 2f47903..97aced3 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -62,8 +62,8 @@ define amdgpu_kernel void @local_volatile_load_0( ; ; SKIP-CACHE-INV-LABEL: local_volatile_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -139,8 +139,8 @@ define amdgpu_kernel void @local_volatile_load_1( ; ; SKIP-CACHE-INV-LABEL: local_volatile_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 @@ -215,8 +215,8 @@ define amdgpu_kernel void @local_volatile_store_0( ; ; SKIP-CACHE-INV-LABEL: local_volatile_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -289,8 +289,8 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; SKIP-CACHE-INV-LABEL: local_volatile_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -360,7 +360,7 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -423,7 +423,7 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll index 3fde622..046325f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -233,7 +233,7 @@ define amdgpu_kernel void @local_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -321,7 +321,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -401,7 +401,7 @@ define amdgpu_kernel void @local_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -474,7 +474,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -547,7 +547,7 @@ define amdgpu_kernel void @local_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -620,7 +620,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -693,7 +693,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -766,7 +766,7 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -839,7 +839,7 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -912,7 +912,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -985,7 +985,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll index 7094634..580d7a8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index 2af31ad..0fd1e7a 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -81,8 +81,8 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 @@ -208,8 +208,8 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_add_i32_e32 v0, vcc, s2, v0 @@ -336,8 +336,8 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -462,8 +462,8 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll index c4c60fe..f605514 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -81,8 +81,8 @@ define amdgpu_kernel void @private_volatile_load_0( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 @@ -178,8 +178,8 @@ define amdgpu_kernel void @private_volatile_load_1( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_add_i32_e32 v0, vcc, s2, v0 @@ -280,8 +280,8 @@ define amdgpu_kernel void @private_volatile_store_0( ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -381,8 +381,8 @@ define amdgpu_kernel void @private_volatile_store_1( ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll index c432d9b..d63c789 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll @@ -7,8 +7,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -20,8 +20,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX7-LABEL: store_lds_v4i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -34,8 +34,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX6-LABEL: store_lds_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -49,8 +49,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; GFX10-LABEL: store_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 @@ -66,8 +66,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 @@ -110,8 +110,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -163,8 +163,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -217,8 +217,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s7 @@ -265,8 +265,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 @@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -314,8 +314,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -344,8 +344,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s7 @@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 @@ -382,8 +382,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -397,8 +397,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -413,8 +413,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 @@ -431,8 +431,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -444,8 +444,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -458,8 +458,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -473,8 +473,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 @@ -490,8 +490,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -503,8 +503,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -517,8 +517,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -532,8 +532,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll index 913b7e4..816ab02 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll @@ -7,8 +7,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -19,8 +19,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX7-LABEL: store_lds_v3i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -32,8 +32,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX6-LABEL: store_lds_v3i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -47,8 +47,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX10-LABEL: store_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -63,8 +63,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 @@ -98,8 +98,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -183,8 +183,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s6 @@ -222,8 +222,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 @@ -239,8 +239,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -263,8 +263,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -288,8 +288,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s6 @@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 @@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -336,8 +336,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -351,8 +351,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 @@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s6 @@ -381,8 +381,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, s4 @@ -395,8 +395,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s6 @@ -427,8 +427,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -439,8 +439,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -452,8 +452,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -467,8 +467,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -- 2.7.4