From: Stanislav Mekhanoshin Date: Tue, 15 Nov 2016 19:00:15 +0000 (+0000) Subject: [AMDGPU] Add wave barrier builtin X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ea91cca593bb543edce86c15c2948ef71b022932;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Add wave barrier builtin The wave barrier represents the discardable barrier. Its main purpose is to carry convergent attribute, thus preventing illegal CFG optimizations. All lanes in a wave come to convergence point simultaneously with SIMT, thus no special instruction is needed in the ISA. The barrier is discarded during code generation. Differential Revision: https://reviews.llvm.org/D26585 llvm-svn: 287007 --- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index cc4fd4c..5105e0d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -107,6 +107,9 @@ def int_amdgcn_dispatch_id : def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; +def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, + Intrinsic<[], [], [IntrConvergent]>; + def int_amdgcn_s_waitcnt : Intrinsic<[], [llvm_i32_ty], []>; def int_amdgcn_div_scale : Intrinsic< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index cb259a3..7d56355 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -196,6 +196,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { + if (isVerbose()) + OutStreamer->emitRawComment(" wave barrier"); + return; + } + MCInst TmpInst; MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 17b3265..2214360 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3492,6 +3492,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (DescSize != 0 && DescSize != 4) return DescSize; + if (Opc == AMDGPU::WAVE_BARRIER) + return 0; + // 4-byte instructions may have a 32-bit literal encoded after them. Check // operands that coud ever be literals. if (isVALU(MI) || isSALU(MI)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 15f3ac5..423599d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -137,6 +137,17 @@ def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst), let isTerminator = 1; } +def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), + [(int_amdgcn_wave_barrier)]> { + let SchedRW = []; + let hasNoSchedulingInfo = 1; + let hasSideEffects = 1; + let mayLoad = 1; + let mayStore = 1; + let isBarrier = 1; + let isConvergent = 1; +} + // SI pseudo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll new file mode 100644 index 0000000..e851797 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_wave_barrier: +; GCN-DAG: ; wave barrier +; GCN-NOT: s_barrier + +define void @test_wave_barrier() #0 { +entry: + call void @llvm.amdgcn.wave.barrier() #1 + ret void +} + +declare void @llvm.amdgcn.wave.barrier() #1 + +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind }