[InstCombine] Bubble vector.reverse of compare operands to their result.

author Paul Walker <paul.walker@arm.com>

Mon, 17 Oct 2022 13:45:09 +0000 (14:45 +0100)

committer Paul Walker <paul.walker@arm.com>

Wed, 21 Dec 2022 15:53:14 +0000 (15:53 +0000)
author Paul Walker <paul.walker@arm.com>
Mon, 17 Oct 2022 13:45:09 +0000 (14:45 +0100)
committer Paul Walker <paul.walker@arm.com>
Wed, 21 Dec 2022 15:53:14 +0000 (15:53 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

index 4877a5e..0430c40 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,6 +17,7 @@
  #include "llvm/Analysis/CmpInstAnalysis.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
  #include "llvm/IR/ConstantRange.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -6091,6 +6092,31 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
    const CmpInst::Predicate Pred = Cmp.getPredicate();
    Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
    Value *V1, *V2;
+
+  auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) {
+    Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName());
+    if (auto *I = dyn_cast<Instruction>(V))
+      I->copyIRFlags(&Cmp);
+    Module *M = Cmp.getModule();
+    Function *F = Intrinsic::getDeclaration(
+        M, Intrinsic::experimental_vector_reverse, V->getType());
+    return CallInst::Create(F, V);
+  };
+
+  if (match(LHS, m_VecReverse(m_Value(V1)))) {
+    // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2)
+    if (match(RHS, m_VecReverse(m_Value(V2))) &&
+        (LHS->hasOneUse() || RHS->hasOneUse()))
+      return createCmpReverse(Pred, V1, V2);
+
+    // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat)
+    if (LHS->hasOneUse() && isSplatValue(RHS))
+      return createCmpReverse(Pred, V1, RHS);
+  }
+  // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2)
+  else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
+    return createCmpReverse(Pred, LHS, V2);
+
    ArrayRef<int> M;
    if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M))))
      return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll

index 11d4c02..cab6be3 100644 (file)
--- a/llvm/test/Transforms/InstCombine/vector-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll
@@ -173,9 +173,8 @@ define <vscale x 4 x float> @unop_reverse_1(<vscale x 4 x float> %a) {
  
  define <vscale x 4 x i1> @icmp_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: @icmp_reverse(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP1]])
  ; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP]]
  ;
    %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -188,9 +187,9 @@ define <vscale x 4 x i1> @icmp_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32>
  define <vscale x 4 x i1> @icmp_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: @icmp_reverse_1(
  ; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
  ; CHECK-NEXT:    call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <vscale x 4 x i32> [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP1]])
  ; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP]]
  ;
    %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -203,10 +202,10 @@ define <vscale x 4 x i1> @icmp_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i3
  ; %b.rev has multiple uses
  define <vscale x 4 x i1> @icmp_reverse_2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: @icmp_reverse_2(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
  ; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
  ; CHECK-NEXT:    call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <vscale x 4 x i32> [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[CMP:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP1]])
  ; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP]]
  ;
    %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -236,10 +235,10 @@ define <vscale x 4 x i1> @icmp_reverse_3(<vscale x 4 x i32> %a, <vscale x 4 x i3
  
  define <vscale x 4 x i1> @icmp_reverse_splat_RHS(<vscale x 4 x i32> %a, i32 %b) {
  ; CHECK-LABEL: @icmp_reverse_splat_RHS(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
  ; CHECK-NEXT:    [[B_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
  ; CHECK-NEXT:    [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[B_INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <vscale x 4 x i32> [[A_REV]], [[B_SPLAT]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <vscale x 4 x i32> [[B_SPLAT]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP1]])
  ; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP]]
  ;
    %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -269,10 +268,10 @@ define <vscale x 4 x i1> @icmp_reverse_splat_RHS_1(<vscale x 4 x i32> %a, i32 %b
  
  define <vscale x 4 x i1> @icmp_reverse_splat_LHS(<vscale x 4 x i32> %a, i32 %b) {
  ; CHECK-LABEL: @icmp_reverse_splat_LHS(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
  ; CHECK-NEXT:    [[B_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
  ; CHECK-NEXT:    [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[B_INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[B_SPLAT]], [[A_REV]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <vscale x 4 x i32> [[B_SPLAT]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP1]])
  ; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP]]
  ;
    %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -656,15 +655,12 @@ define <vscale x 4 x float> @reverse_binop_reverse_splat_LHS(<vscale x 4 x float
  
  define <vscale x 4 x i1> @reverse_fcmp_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: @reverse_fcmp_reverse(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> [[B:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 4 x float> [[A_REV]], [[B_REV]]
-; CHECK-NEXT:    [[CMP_REV:%.*]] = tail call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[CMP]])
-; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP_REV]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast olt <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[CMP1]]
  ;
    %a.rev = tail call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
    %b.rev = tail call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %b)
-  %cmp = fcmp olt <vscale x 4 x float> %a.rev, %b.rev
+  %cmp = fcmp fast olt <vscale x 4 x float> %a.rev, %b.rev
    %cmp.rev = tail call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %cmp)
    ret <vscale x 4 x i1> %cmp.rev
  }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

index 34e2f34..58c5410 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -10,7 +10,7 @@
  
  ; The test checks if the mask is being correctly created, reverted and used
  
-; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \
+; RUN: opt -passes=loop-vectorize,dce -mtriple aarch64-linux-gnu -S \
  ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -20,9 +20,11 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
  ; CHECK-LABEL: vector.body:
  ; CHECK: %[[REVERSE6:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
  ; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
-; CHECK-NEXT: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[WIDEMSKLOAD]]
+; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
+; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
+; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
  ; CHECK:  %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
-; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[FADD]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
+; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
  
  entry:
    %cmp7 = icmp sgt i64 %N, 0
author	Paul Walker <paul.walker@arm.com>
	Mon, 17 Oct 2022 13:45:09 +0000 (14:45 +0100)
committer	Paul Walker <paul.walker@arm.com>
	Wed, 21 Dec 2022 15:53:14 +0000 (15:53 +0000)
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/vector-reverse.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll		patch \| blob \| history