From c34900e1335d490bf6f16fba55eacd4ecf831f72 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Wed, 28 Apr 2021 13:11:44 +0200 Subject: [PATCH] AMDGPU/GlobalISel: Fix selection of image intrinsics with unused return When atomic image intrinsic return value is unused, register class for destination of a sub-register copy of return value ends up not being set. This copy then hits 'Register class not set' assert later. If return value has uses, register class is determined by use instruction. Fix is to not create sub-register copy when image intrinsic destination has no uses because it would be deleted by dead-mi-elimination later anyway. Differential Revision: https://reviews.llvm.org/D101448 --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 6 +- .../GlobalISel/llvm.amdgcn.image.atomic.dim.ll | 88 +++++++++++ .../GlobalISel/llvm.amdgcn.image.atomic.dim.mir | 170 +++++++++++++++++++++ 3 files changed, 262 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 792c48f..99aa6db 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1647,8 +1647,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; MIB.addDef(TmpReg); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut) - .addReg(TmpReg, RegState::Kill, SubReg); + if (!MRI->use_empty(VDataOut)) { + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut) + .addReg(TmpReg, RegState::Kill, SubReg); + } } else { MIB.addDef(VDataOut); // vdata output diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index 59d059d..a617ed9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -628,6 +628,50 @@ main_body: ret float %out } +define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { +; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_endpgm +; +; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_endpgm +; +; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_endpgm +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { ; GFX6-LABEL: atomic_add_i32_2d: ; GFX6: ; %bb.0: ; %main_body @@ -1636,6 +1680,50 @@ main_body: ret <2 x float> %out } +define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { +; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX6-NEXT: s_endpgm +; +; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX8-NEXT: s_endpgm +; +; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_endpgm +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) { ; GFX6-LABEL: atomic_add_i64_2d: ; GFX6: ; %bb.0: ; %main_body diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir new file mode 100644 index 0000000..4351cdb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -0,0 +1,170 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s + +--- +name: atomic_cmpswap_i32_1d +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_cmpswap_i32_1d + ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 + ; GFX6: $vgpr0 = COPY [[COPY3]] + ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-LABEL: name: atomic_cmpswap_i32_1d + ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 + ; GFX8: $vgpr0 = COPY [[COPY3]] + ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX10-LABEL: name: atomic_cmpswap_i32_1d + ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 + ; GFX10: $vgpr0 = COPY [[COPY3]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 4 on custom "ImageResource") + $vgpr0 = COPY %3(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 +... + +--- +name: atomic_cmpswap_i32_1d_no_return +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return + ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX6: S_ENDPGM 0 + ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return + ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX8: S_ENDPGM 0 + ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return + ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10: S_ENDPGM 0 + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 4 on custom "ImageResource") + S_ENDPGM 0 +... + +--- +name: atomic_cmpswap_i64_1d +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + + ; GFX6-LABEL: name: atomic_cmpswap_i64_1d + ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; GFX8-LABEL: name: atomic_cmpswap_i64_1d + ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 + ; GFX8: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; GFX10-LABEL: name: atomic_cmpswap_i64_1d + ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX10: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %2:vgpr(s32) = COPY $vgpr4 + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 8 on custom "ImageResource") + $vgpr0_vgpr1 = COPY %3(s64) + SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 +... + +--- +name: atomic_cmpswap_i64_1d_no_return +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + + ; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return + ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX6: S_ENDPGM 0 + ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return + ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX8: S_ENDPGM 0 + ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return + ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX10: S_ENDPGM 0 + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %2:vgpr(s32) = COPY $vgpr4 + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 8 on custom "ImageResource") + S_ENDPGM 0 +... -- 2.7.4