From ef160de3e5af3c8e51928fbe7b096af3d9471880 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 16 Mar 2016 20:14:33 +0000 Subject: [PATCH] AMDGPU: Prevent uniform loops from becoming infinite Summary: Uniform loops where the branch leaving the loop is predicated on VCCNZ must be skipped if EXEC = 0, otherwise they will be infinite. Reviewers: tstellarAMD, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18137 llvm-svn: 263658 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 6 +++++ .../AMDGPU/uniform-loop-inside-nonuniform.ll | 28 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 7dd0d7b..2a645d1 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -137,6 +137,12 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From, NumInstr < SkipThreshold && I != E; ++I) { if (I->isBundle() || !I->isBundled()) + // When a uniform loop is inside non-uniform control flow, the branch + // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken + // when EXEC = 0. We should skip the loop lest it becomes infinite. + if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ) + return true; + if (++NumInstr >= SkipThreshold) return true; } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll new file mode 100644 index 0000000..26927e4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -0,0 +1,28 @@ +;RUN: llc -march=amdgcn -mcpu=verde < %s | FileCheck %s --check-prefix=CHECK + +; Test a simple uniform loop that lives inside non-uniform control flow. + +;CHECK-LABEL: {{^}}test1: +;CHECK: s_cbranch_execz +;CHECK: %loop_body +define void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) #0 { +main_body: + %cc = icmp eq i32 %p, 0 + br i1 %cc, label %out, label %loop_body + +loop_body: + %counter = phi i32 [ 0, %main_body ], [ %incr, %loop_body ] + + ; Prevent the loop from being optimized out + call void asm sideeffect "", "" () + + %incr = add i32 %counter, 1 + %lc = icmp sge i32 %incr, 1000 + br i1 %lc, label %out, label %loop_body + +out: + ret void +} + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readonly } -- 2.7.4