From: Sanjay Patel Date: Wed, 30 Nov 2022 19:10:02 +0000 (-0500) Subject: Revert "[InstCombine] canonicalize trunc + insert as bitcast + shuffle, part 1" X-Git-Tag: upstream/17.0.6~25831 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5eacdcff0653e977009a727193a7f91917075d3b;p=platform%2Fupstream%2Fllvm.git Revert "[InstCombine] canonicalize trunc + insert as bitcast + shuffle, part 1" This reverts commit a4c466766db77cd1fb42d7f98f32bb87a3d38829. This broke clang tests that are wrongly dependent on the optimizer. --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 3320bf5..7d613f2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include #define DEBUG_TYPE "instcombine" @@ -1515,66 +1514,6 @@ static Instruction *narrowInsElt(InsertElementInst &InsElt, return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType()); } -/// Try to convert scalar extraction ops (shift+trunc) with insertelt to -/// bitcast and shuffle: -/// inselt V, (lshr (trunc X)), IndexC --> shuffle (bitcast X), V, Mask -static Instruction *foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian, - InstCombiner::BuilderTy &Builder) { - // inselt undef, (trunc T), IndexC - // TODO: Allow any base vector value. - // TODO: The one-use limitation could be removed for some cases (eg, no - // extra shuffle is needed and a shift is eliminated). - auto *VTy = dyn_cast(InsElt.getType()); - Value *T, *V = InsElt.getOperand(0); - uint64_t IndexC; - if (!VTy || !match(InsElt.getOperand(1), m_OneUse(m_Trunc(m_Value(T)))) || - !match(InsElt.getOperand(2), m_ConstantInt(IndexC)) || - !match(V, m_Undef())) - return nullptr; - - Type *SrcTy = T->getType(); - unsigned ScalarWidth = SrcTy->getScalarSizeInBits(); - unsigned VecEltWidth = VTy->getScalarSizeInBits(); - if (ScalarWidth % VecEltWidth != 0) - return nullptr; - - unsigned NumEltsInScalar = ScalarWidth / VecEltWidth; - Value *X = T; - if ((IsBigEndian && IndexC == NumEltsInScalar - 1) || - (!IsBigEndian && IndexC == 0)) { - // The insert is to the LSB end of the vector (depends on endian). - // That's all we need. - } else { - // TODO: Look through a shift-right and translate the insert index. - return nullptr; - } - - // Bitcast the scalar to a vector type with the destination element type. - Type *CastTy = FixedVectorType::get(VTy->getElementType(), NumEltsInScalar); - Value *VecX = Builder.CreateBitCast(X, CastTy, "vec." + X->getName()); - - unsigned NumElts = VTy->getNumElements(); - if (NumElts > NumEltsInScalar) { - // Pad the source vector with undef elements, so it matches the dest type. - SmallVector IdentityPaddedMask(NumElts, UndefMaskElem); - for (unsigned i = 0; i != NumEltsInScalar; ++i) - IdentityPaddedMask[i] = i; - VecX = Builder.CreateShuffleVector(VecX, IdentityPaddedMask); - } else if (NumElts < NumEltsInScalar) { - // Narrow the source vector, so it matches the dest type. - SmallVector IdentityExtractMask(NumElts); - std::iota(IdentityExtractMask.begin(), IdentityExtractMask.end(), 0); - VecX = Builder.CreateShuffleVector(VecX, IdentityExtractMask); - } - - // Insert the truncated element using a select-shuffle. All lanes but one are - // from the base vector V. - SmallVector SelectMask(NumElts); - std::iota(SelectMask.begin(), SelectMask.end(), 0); - SelectMask[IndexC] = (int)IndexC + NumElts; - return new ShuffleVectorInst(V, VecX, SelectMask); -} - Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -1702,9 +1641,6 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Ext = narrowInsElt(IE, Builder)) return Ext; - if (Instruction *Shuf = foldTruncInsElt(IE, DL.isBigEndian(), Builder)) - return Shuf; - return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/insert-trunc.ll b/llvm/test/Transforms/InstCombine/insert-trunc.ll index 20922f9..3ae128e 100644 --- a/llvm/test/Transforms/InstCombine/insert-trunc.ll +++ b/llvm/test/Transforms/InstCombine/insert-trunc.ll @@ -1,20 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE -; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL declare void @use(i8) declare void @use64(i64) define <4 x i16> @low_index_same_length_poison_basevec(i64 %x) { -; BE-LABEL: @low_index_same_length_poison_basevec( -; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 -; BE-NEXT: ret <4 x i16> [[R]] -; -; LE-LABEL: @low_index_same_length_poison_basevec( -; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> -; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> -; LE-NEXT: ret <4 x i16> [[R]] +; ALL-LABEL: @low_index_same_length_poison_basevec( +; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 +; ALL-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> poison, i16 %t, i64 0 @@ -22,15 +17,10 @@ define <4 x i16> @low_index_same_length_poison_basevec(i64 %x) { } define <4 x i16> @high_index_same_length_poison_basevec(i64 %x) { -; BE-LABEL: @high_index_same_length_poison_basevec( -; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> -; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> -; BE-NEXT: ret <4 x i16> [[R]] -; -; LE-LABEL: @high_index_same_length_poison_basevec( -; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; LE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3 -; LE-NEXT: ret <4 x i16> [[R]] +; ALL-LABEL: @high_index_same_length_poison_basevec( +; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3 +; ALL-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> poison, i16 %t, i64 3 @@ -49,15 +39,10 @@ define <4 x i16> @wrong_index_same_length_poison_basevec(i64 %x) { } define <8 x i16> @low_index_longer_length_poison_basevec(i64 %x) { -; BE-LABEL: @low_index_longer_length_poison_basevec( -; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; BE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0 -; BE-NEXT: ret <8 x i16> [[R]] -; -; LE-LABEL: @low_index_longer_length_poison_basevec( -; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> -; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> -; LE-NEXT: ret <8 x i16> [[R]] +; ALL-LABEL: @low_index_longer_length_poison_basevec( +; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0 +; ALL-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> poison, i16 %t, i64 0 @@ -65,15 +50,10 @@ define <8 x i16> @low_index_longer_length_poison_basevec(i64 %x) { } define <8 x i16> @high_index_longer_length_poison_basevec(i64 %x) { -; BE-LABEL: @high_index_longer_length_poison_basevec( -; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> -; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> -; BE-NEXT: ret <8 x i16> [[R]] -; -; LE-LABEL: @high_index_longer_length_poison_basevec( -; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 -; LE-NEXT: ret <8 x i16> [[R]] +; ALL-LABEL: @high_index_longer_length_poison_basevec( +; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 +; ALL-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> poison, i16 %t, i64 3 @@ -92,15 +72,10 @@ define <8 x i16> @wrong_index_longer_length_poison_basevec(i64 %x) { } define <2 x i16> @low_index_shorter_length_poison_basevec(i64 %x) { -; BE-LABEL: @low_index_shorter_length_poison_basevec( -; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; BE-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0 -; BE-NEXT: ret <2 x i16> [[R]] -; -; LE-LABEL: @low_index_shorter_length_poison_basevec( -; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> -; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <2 x i32> -; LE-NEXT: ret <2 x i16> [[R]] +; ALL-LABEL: @low_index_shorter_length_poison_basevec( +; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0 +; ALL-NEXT: ret <2 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <2 x i16> poison, i16 %t, i64 0 @@ -169,17 +144,11 @@ define <4 x i16> @lshr_same_length_poison_basevec_be(i64 %x) { } define <4 x i16> @lshr_same_length_poison_basevec_both_endian(i64 %x) { -; BE-LABEL: @lshr_same_length_poison_basevec_both_endian( -; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 -; BE-NEXT: ret <4 x i16> [[R]] -; -; LE-LABEL: @lshr_same_length_poison_basevec_both_endian( -; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> -; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <4 x i32> -; LE-NEXT: ret <4 x i16> [[R]] +; ALL-LABEL: @lshr_same_length_poison_basevec_both_endian( +; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 +; ALL-NEXT: ret <4 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -201,17 +170,11 @@ define <4 x i16> @lshr_wrong_index_same_length_poison_basevec(i64 %x) { } define <8 x i16> @lshr_longer_length_poison_basevec_le(i64 %x) { -; BE-LABEL: @lshr_longer_length_poison_basevec_le( -; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; BE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> -; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <8 x i32> -; BE-NEXT: ret <8 x i16> [[R]] -; -; LE-LABEL: @lshr_longer_length_poison_basevec_le( -; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 -; LE-NEXT: ret <8 x i16> [[R]] +; ALL-LABEL: @lshr_longer_length_poison_basevec_le( +; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 +; ALL-NEXT: ret <8 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -285,17 +248,11 @@ define <4 x i8> @lshr_wrong_index_shorter_length_poison_basevec(i64 %x) { } define <4 x i8> @lshr_wrong_shift_shorter_length_poison_basevec(i64 %x) { -; BE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( -; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 -; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 -; BE-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0 -; BE-NEXT: ret <4 x i8> [[R]] -; -; LE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( -; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 -; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <8 x i8> -; LE-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[VEC_S]], <8 x i8> poison, <4 x i32> -; LE-NEXT: ret <4 x i8> [[R]] +; ALL-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( +; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 +; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 +; ALL-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0 +; ALL-NEXT: ret <4 x i8> [[R]] ; %s = lshr i64 %x, 57 %t = trunc i64 %s to i8 diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll index 2fd7b4b..79c3d37 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll @@ -83,20 +83,21 @@ ret: define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-LABEL: @nocopy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 -; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0 +; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1 ; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10 -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10 +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll index 76ba2eb..1bdc217 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll @@ -83,20 +83,21 @@ ret: define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-LABEL: @nocopy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 -; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0 +; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1 ; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10 -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10 +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index 699d64a..99e5bee 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -155,21 +155,22 @@ end: define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v8i16_add1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_Y:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16> -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[VEC_Y]], <2 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll index 63b90ac..1ded4db 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll @@ -50,15 +50,15 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de ; SSE-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16 ; SSE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; SSE-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 -; SSE-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 -; SSE-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 -; SSE-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 -; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> +; SSE-NEXT: [[TMP1:%.*]] = lshr i64 [[V_VAL20]], 32 +; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[V_VAL20]], i64 0 +; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 +; SSE-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32 +; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2 +; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3 +; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float> ; SSE-NEXT: ret <4 x float> [[VECINIT16]] ; ; AVX-LABEL: @ConvertVectors_ByVal( @@ -66,15 +66,15 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de ; AVX-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16 ; AVX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; AVX-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 -; AVX-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> -; AVX-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 -; AVX-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 -; AVX-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 -; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> +; AVX-NEXT: [[TMP1:%.*]] = trunc i64 [[V_VAL20]] to i32 +; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i64 0 +; AVX-NEXT: [[TMP3:%.*]] = lshr i64 [[V_VAL20]], 32 +; AVX-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i64 1 +; AVX-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2 +; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3 +; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float> ; AVX-NEXT: ret <4 x float> [[VECINIT16]] ; entry: