return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
}
+/// If we have an insertelement instruction feeding into another insertelement
+/// and the 2nd is inserting a constant into the vector, canonicalize that
+/// constant insertion before the insertion of a variable:
+///
+/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
+/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
+///
+/// This has the potential of eliminating the 2nd insertelement instruction
+/// via constant folding of the scalar constant into a vector constant.
+static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
+ InstCombiner::BuilderTy &Builder) {
+ auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
+ if (!InsElt1 || !InsElt1->hasOneUse())
+ return nullptr;
+
+ Value *X, *Y;
+ Constant *ScalarC;
+ ConstantInt *IdxC1, *IdxC2;
+ if (match(InsElt1->getOperand(0), m_Value(X)) &&
+ match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
+ match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
+ match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
+ match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
+ Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
+ return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
+ }
+
+ return nullptr;
+}
+
/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
/// --> shufflevector X, CVec', Mask'
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
return Shuf;
+ if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder))
+ return NewInsElt;
+
// Turn a sequence of inserts that broadcasts a scalar into a single
// insert + shufflevector.
if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
define <2 x float> @test6(float %A){
; CHECK-LABEL: @test6(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 4.200000e+01, i32 1
-; CHECK-NEXT: ret <2 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 4.200000e+01>, float %A, i32 0
+; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
%tmp23 = bitcast float %A to i32
%tmp24 = zext i32 %tmp23 to i64
define <8 x i16> @pr26015(<4 x i16> %t0) {
; CHECK-LABEL: @pr26015(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> %t0, i32 2
-; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[TMP2]], i32 3
-; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
-; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: ret <8 x i16> [[T5]]
;
%t1 = extractelement <4 x i16> %t0, i32 2
; CHECK-NEXT: br i1 %b, label %if, label %end
; CHECK: if:
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[T1]], i32 3
-; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
+; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: ret <8 x i16> [[T5]]
; CHECK: end:
define <2 x float> @test_fptrunc(double %f) {
; CHECK-LABEL: @test_fptrunc(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
-; CHECK-NEXT: ret <2 x float> [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double %f, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
%tmp9 = insertelement <4 x double> undef, double %f, i32 0
%tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
define <2 x double> @test_fpext(float %f) {
; CHECK-LABEL: @test_fpext(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
-; CHECK-NEXT: ret <2 x double> [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float %f, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%tmp9 = insertelement <4 x float> undef, float %f, i32 0
%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
define <4 x float> @test_select(float %f, float %g) {
; CHECK-LABEL: @test_select(
-; CHECK-NEXT: [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3
+; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float %f, i32 0
; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[RET]]
;
ret <4 x float> %ins2
}
+; Insert of a constant is canonicalized ahead of insert of a variable.
+
define <4 x float> @bar(<4 x float> %x, float %a) {
; CHECK-LABEL: @bar(
-; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> %x, float %a, i32 1
-; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 1
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float %a, i32 1