[InstCombine] canonicalize insertelement of scalar constant ahead of insertelement...

author Sanjay Patel <spatel@rotateright.com>

Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

index 10f03c4..c29d327 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -645,6 +645,36 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
    return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
  }
  
+/// If we have an insertelement instruction feeding into another insertelement
+/// and the 2nd is inserting a constant into the vector, canonicalize that
+/// constant insertion before the insertion of a variable:
+///
+/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
+/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
+///
+/// This has the potential of eliminating the 2nd insertelement instruction
+/// via constant folding of the scalar constant into a vector constant.
+static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
+                                     InstCombiner::BuilderTy &Builder) {
+  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
+  if (!InsElt1 || !InsElt1->hasOneUse())
+    return nullptr;
+
+  Value *X, *Y;
+  Constant *ScalarC;
+  ConstantInt *IdxC1, *IdxC2;
+  if (match(InsElt1->getOperand(0), m_Value(X)) &&
+      match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
+      match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
+      match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
+      match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
+    Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
+    return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
+  }
+
+  return nullptr;
+}
+
  /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
  /// --> shufflevector X, CVec', Mask'
  static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
@@ -806,6 +836,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
      return Shuf;
  
+  if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder))
+    return NewInsElt;
+
    // Turn a sequence of inserts that broadcasts a scalar into a single
    // insert + shufflevector.
    if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
diff --git a/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll b/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll

index 1a91d11..a6b2ac8 100644 (file)
--- a/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -81,9 +81,8 @@ define <2 x float> @test5(float %A, float %B) {
  
  define <2 x float> @test6(float %A){
  ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 4.200000e+01, i32 1
-; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 4.200000e+01>, float %A, i32 0
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
  ;
    %tmp23 = bitcast float %A to i32
    %tmp24 = zext i32 %tmp23 to i64
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll

index 4507deb..29f774c 100644 (file)
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -86,11 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
  
  define <8 x i16> @pr26015(<4 x i16> %t0) {
  ; CHECK-LABEL: @pr26015(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> %t0, i32 2
-; CHECK-NEXT:    [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[TMP2]], i32 3
-; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
-; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
  ; CHECK-NEXT:    ret <8 x i16> [[T5]]
  ;
    %t1 = extractelement <4 x i16> %t0, i32 2
@@ -110,8 +107,7 @@ define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) {
  ; CHECK-NEXT:    br i1 %b, label %if, label %end
  ; CHECK:       if:
  ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[T1]], i32 3
-; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
+; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
  ; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
  ; CHECK-NEXT:    ret <8 x i16> [[T5]]
  ; CHECK:       end:
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll

index e29c9ee..5f27634 100644 (file)
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -182,10 +182,9 @@ define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
  
  define <2 x float> @test_fptrunc(double %f) {
  ; CHECK-LABEL: @test_fptrunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
-; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
  ;
    %tmp9 = insertelement <4 x double> undef, double %f, i32 0
    %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
@@ -198,10 +197,9 @@ define <2 x float> @test_fptrunc(double %f) {
  
  define <2 x double> @test_fpext(float %f) {
  ; CHECK-LABEL: @test_fpext(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
-; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
  ;
    %tmp9 = insertelement <4 x float> undef, float %f, i32 0
    %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
@@ -223,8 +221,7 @@ define <4 x double> @test_shuffle(<4 x double> %f) {
  
  define <4 x float> @test_select(float %f, float %g) {
  ; CHECK-LABEL: @test_select(
-; CHECK-NEXT:    [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float %f, i32 0
  ; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
  ; CHECK-NEXT:    ret <4 x float> [[RET]]
  ;
diff --git a/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll

index 8db937a..c358509 100644 (file)
--- a/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
@@ -12,10 +12,12 @@ define <4 x float> @foo(<4 x float> %x) {
    ret <4 x float> %ins2
  }
  
+; Insert of a constant is canonicalized ahead of insert of a variable.
+
  define <4 x float> @bar(<4 x float> %x, float %a) {
  ; CHECK-LABEL: @bar(
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float %a, i32 1
-; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 1
  ; CHECK-NEXT:    ret <4 x float> [[INS2]]
  ;
    %ins1 = insertelement<4 x float> %x, float %a, i32 1
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 22 Mar 2017 17:10:44 +0000 (17:10 +0000)
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/bitcast-bigendian.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/vec_demanded_elts.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll		patch \| blob \| history