From 33439f982bd6650c8b42574ade840983962e28a9 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 12 Apr 2017 15:11:33 +0000
Subject: [PATCH] [InstCombine] morph an existing instruction instead of
 creating a new one

One potential way to make InstCombine (very slightly?) faster is to recycle instructions
when possible instead of creating new ones. It's not explicitly stated AFAIK, but we don't
consider this an "InstSimplify". We could, however, make a new layer to house transforms
like this if that makes InstCombine more manageable (just throwing out an idea; not sure
how much opportunity is actually here).

Differential Revision: https://reviews.llvm.org/D31863

llvm-svn: 300067
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 13 ++++++-------
 llvm/test/Transforms/InstCombine/select-cmp-br.ll       |  4 ++--
 llvm/test/Transforms/LoopVectorize/if-conversion.ll     |  4 ++--
 llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll   |  4 ++--
 4 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ea8c0a6..99a983a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2408,13 +2408,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     }
   }
 
-  if (Constant *RHS = dyn_cast<Constant>(Op1)) {
-    if (RHS->isAllOnesValue() && Op0->hasOneUse())
-      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
-      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
-        return CmpInst::Create(CI->getOpcode(),
-                               CI->getInversePredicate(),
-                               CI->getOperand(0), CI->getOperand(1));
+  // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+  ICmpInst::Predicate Pred;
+  if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) &&
+      match(Op1, m_AllOnes())) {
+    cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
+    return replaceInstUsesWith(I, Op0);
   }
 
   if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
diff --git a/llvm/test/Transforms/InstCombine/select-cmp-br.ll b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
index e07bfd1..59384ab 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp-br.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
@@ -15,8 +15,8 @@ define void @test1(%C* %arg) {
 ; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
 ; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb:
@@ -115,8 +115,8 @@ define void @test3(%C* %arg) {
 ; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
 ; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb:
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
index acf7b12..d3a16e2 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
@@ -18,9 +18,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ;CHECK-LABEL: @function0(
 ;CHECK: load <4 x i32>
+;CHECK: icmp sle <4 x i32>
 ;CHECK: mul <4 x i32>
 ;CHECK: add <4 x i32>
-;CHECK: icmp sle <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
 define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
@@ -71,8 +71,8 @@ for.end:
 
 ;CHECK-LABEL: @reduction_func(
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
 ;CHECK: icmp slt <4 x i32>
+;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
 define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index baec524..d5d0224 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -5,8 +5,8 @@
 define void @test_simple(i32* %p, i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_simple(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X2]], true
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -43,8 +43,8 @@ define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
 ; CHECK:         [[X3:%.*]] = icmp eq i32 [[C]], 0
-- 
2.7.4