From: Matt Arsenault Date: Tue, 2 Oct 2018 03:50:56 +0000 (+0000) Subject: AMDGPU: Expand atomicrmw nand in IR X-Git-Tag: llvmorg-8.0.0-rc1~7448 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ab41193312ff0fd06648c1bad63f02d36578dfb8;p=platform%2Fupstream%2Fllvm.git AMDGPU: Expand atomicrmw nand in IR llvm-svn: 343559 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3b0026a..ae6b925 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4477,3 +4477,10 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return false; } } + +TargetLowering::AtomicExpansionKind +AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + if (RMW->getOperation() == AtomicRMWInst::Nand) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::None; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index a231265..92d8991 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -307,6 +307,8 @@ public: MVT getFenceOperandTy(const DataLayout &DL) const override { return MVT::i32; } + + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f3fe0ae..f6ebc3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -602,6 +602,7 @@ void AMDGPUPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); + addPass(createAtomicExpandPass()); addPass(createAMDGPULowerIntrinsicsPass()); if (TM.getTargetTriple().getArch() == Triple::r600 || diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll new file mode 100644 index 0000000..7af3341 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind { +; GCN-LABEL: atomic_nand_i32_lds: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v2, v0 +; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: BB0_1: ; %atomicrmw.start +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_not_b32_e32 v1, v2 +; GCN-NEXT: v_or_b32_e32 v1, -5, v1 +; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GCN-NEXT: v_mov_b32_e32 v2, v1 +; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_cbranch_execnz BB0_1 +; GCN-NEXT: ; %bb.2: ; %atomicrmw.end +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst + ret i32 %result +} + +define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind { +; GCN-LABEL: atomic_nand_i32_global: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: global_load_dword v3, v[0:1], off +; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: BB1_1: ; %atomicrmw.start +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_not_b32_e32 v2, v3 +; GCN-NEXT: v_or_b32_e32 v2, -5, v2 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_wbinvl1_vol +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_cbranch_execnz BB1_1 +; GCN-NEXT: ; %bb.2: ; %atomicrmw.end +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw nand i32 addrspace(1)* %ptr, i32 4 seq_cst + ret i32 %result +} + +define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind { +; GCN-LABEL: atomic_nand_i32_flat: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_load_dword v3, v[0:1] +; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: BB2_1: ; %atomicrmw.start +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_not_b32_e32 v2, v3 +; GCN-NEXT: v_or_b32_e32 v2, -5, v2 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_wbinvl1_vol +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_cbranch_execnz BB2_1 +; GCN-NEXT: ; %bb.2: ; %atomicrmw.end +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw nand i32* %ptr, i32 4 seq_cst + ret i32 %result +} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-nand.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-nand.ll new file mode 100644 index 0000000..95e28fa --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-nand.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -atomic-expand %s | FileCheck %s +; RUN: opt -mtriple=r600-mesa-mesa3d -S -atomic-expand %s | FileCheck %s + +define i32 @test_atomicrmw_nand_i32_flat(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_atomicrmw_nand_i32_flat( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret i32 [[NEWLOADED]] +; + %res = atomicrmw nand i32* %ptr, i32 %value seq_cst + ret i32 %res +} + +define i32 @test_atomicrmw_nand_i32_global(i32 addrspace(1)* %ptr, i32 %value) { +; CHECK-LABEL: @test_atomicrmw_nand_i32_global( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32 addrspace(1)* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret i32 [[NEWLOADED]] +; + %res = atomicrmw nand i32 addrspace(1)* %ptr, i32 %value seq_cst + ret i32 %res +} + +define i32 @test_atomicrmw_nand_i32_local(i32 addrspace(3)* %ptr, i32 %value) { +; CHECK-LABEL: @test_atomicrmw_nand_i32_local( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32 addrspace(3)* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret i32 [[NEWLOADED]] +; + %res = atomicrmw nand i32 addrspace(3)* %ptr, i32 %value seq_cst + ret i32 %res +} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/lit.local.cfg b/llvm/test/Transforms/AtomicExpand/AMDGPU/lit.local.cfg new file mode 100644 index 0000000..ec718bb --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True