From: Paul Walker Date: Mon, 17 Oct 2022 13:45:09 +0000 (+0100) Subject: [InstCombine] Bubble vector.reverse of compare operands to their result. X-Git-Tag: upstream/17.0.6~22955 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=362c52ad5a90d4f1c2fb01f3f5989ca4dc13855f;p=platform%2Fupstream%2Fllvm.git [InstCombine] Bubble vector.reverse of compare operands to their result. This mirrors a similar shufflevector transformation so the same effect is obtained for scalable vectors. The transformation is only performed when it can be proven the number of resulting reversals is not increased. By bubbling the reversals from operand to result this should typically be the case and ideally leads to back-back shuffles that can be elimitated entirely. Differential Revision: https://reviews.llvm.org/D139340 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 4877a5e..0430c40 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -6091,6 +6092,31 @@ static Instruction *foldVectorCmp(CmpInst &Cmp, const CmpInst::Predicate Pred = Cmp.getPredicate(); Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1); Value *V1, *V2; + + auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) { + Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName()); + if (auto *I = dyn_cast(V)) + I->copyIRFlags(&Cmp); + Module *M = Cmp.getModule(); + Function *F = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reverse, V->getType()); + return CallInst::Create(F, V); + }; + + if (match(LHS, m_VecReverse(m_Value(V1)))) { + // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2) + if (match(RHS, m_VecReverse(m_Value(V2))) && + (LHS->hasOneUse() || RHS->hasOneUse())) + return createCmpReverse(Pred, V1, V2); + + // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createCmpReverse(Pred, V1, RHS); + } + // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2) + else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) + return createCmpReverse(Pred, LHS, V2); + ArrayRef M; if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M)))) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll index 11d4c02..cab6be3 100644 --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -173,9 +173,8 @@ define @unop_reverse_1( %a) { define @icmp_reverse( %a, %b) { ; CHECK-LABEL: @icmp_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -188,9 +187,9 @@ define @icmp_reverse( %a, define @icmp_reverse_1( %a, %b) { ; CHECK-LABEL: @icmp_reverse_1( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -203,10 +202,10 @@ define @icmp_reverse_1( %a, @icmp_reverse_2( %a, %b) { ; CHECK-LABEL: @icmp_reverse_2( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -236,10 +235,10 @@ define @icmp_reverse_3( %a, @icmp_reverse_splat_RHS( %a, i32 %b) { ; CHECK-LABEL: @icmp_reverse_splat_RHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt [[A_REV]], [[B_SPLAT]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -269,10 +268,10 @@ define @icmp_reverse_splat_RHS_1( %a, i32 %b define @icmp_reverse_splat_LHS( %a, i32 %b) { ; CHECK-LABEL: @icmp_reverse_splat_LHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[CMP:%.*]] = icmp ult [[B_SPLAT]], [[A_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -656,15 +655,12 @@ define @reverse_binop_reverse_splat_LHS( @reverse_fcmp_reverse( %a, %b) { ; CHECK-LABEL: @reverse_fcmp_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4f32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4f32( [[B:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt [[A_REV]], [[B_REV]] -; CHECK-NEXT: [[CMP_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i1( [[CMP]]) -; CHECK-NEXT: ret [[CMP_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[CMP1]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4f32( %a) %b.rev = tail call @llvm.experimental.vector.reverse.nxv4f32( %b) - %cmp = fcmp olt %a.rev, %b.rev + %cmp = fcmp fast olt %a.rev, %b.rev %cmp.rev = tail call @llvm.experimental.vector.reverse.nxv4i1( %cmp) ret %cmp.rev } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll index 34e2f34..58c5410 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll @@ -10,7 +10,7 @@ ; The test checks if the mask is being correctly created, reverted and used -; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \ +; RUN: opt -passes=loop-vectorize,dce -mtriple aarch64-linux-gnu -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -20,9 +20,11 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 { ; CHECK-LABEL: vector.body: ; CHECK: %[[REVERSE6:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) ; CHECK: %[[WIDEMSKLOAD:.*]] = call @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, %[[REVERSE6]], poison) -; CHECK-NEXT: %[[FADD:.*]] = fadd %[[WIDEMSKLOAD]] +; CHECK: %[[REVERSE7:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[WIDEMSKLOAD]]) +; CHECK: %[[FADD:.*]] = fadd %[[REVERSE7]] +; CHECK: %[[REVERSE8:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[FADD]]) ; CHECK: %[[REVERSE9:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) -; CHECK: call void @llvm.masked.store.nxv4f64.p0( %[[FADD]], ptr %{{.*}}, i32 8, %[[REVERSE9]] +; CHECK: call void @llvm.masked.store.nxv4f64.p0( %[[REVERSE8]], ptr %{{.*}}, i32 8, %[[REVERSE9]] entry: %cmp7 = icmp sgt i64 %N, 0