From dbacea188b31f4deae2704282c166c03a00821da Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Tue, 4 Sep 2018 22:12:23 +0000 Subject: [PATCH] [LV] First order recurrence phis should not be treated as uniform This is fix for PR38786. First order recurrence phis were incorrectly treated as uniform, which caused them to be vectorized as uniform instructions. Patch by Ayal Zaks and Orivej Desh! Reviewed by: Anna Differential Revision: https://reviews.llvm.org/D51639 llvm-svn: 341416 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++++ .../Transforms/LoopVectorize/X86/uniform-phi.ll | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 90f1155..7c1012f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4529,6 +4529,11 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // isOutOfScope operands cannot be uniform instructions. if (isOutOfScope(OV)) continue; + // First order recurrence Phi's should typically be considered + // non-uniform. + auto *OP = dyn_cast(OV); + if (OP && Legal->isFirstOrderRecurrence(OP)) + continue; // If all the users of the operand are uniform, then add the // operand into the uniform worklist. auto *OI = cast(OV); diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll index 881f29a..2be565e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll @@ -75,3 +75,25 @@ for.end: ; preds = %for.body ret i64 %retval } +; CHECK-LABEL: PR38786 +; Check that first order recurrence phis (%phi32 and %phi64) are not uniform. +; CHECK-NOT: LV: Found uniform instruction: %phi +define void @PR38786(double* %y, double* %x, i64 %n) { +entry: + br label %for.body + +for.body: + %phi32 = phi i32 [ 0, %entry ], [ %i32next, %for.body ] + %phi64 = phi i64 [ 0, %entry ], [ %i64next, %for.body ] + %i32next = add i32 %phi32, 1 + %i64next = zext i32 %i32next to i64 + %xip = getelementptr inbounds double, double* %x, i64 %i64next + %yip = getelementptr inbounds double, double* %y, i64 %phi64 + %xi = load double, double* %xip, align 8 + store double %xi, double* %yip, align 8 + %cmp = icmp slt i64 %i64next, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} -- 2.7.4