}
/// Check if two insertelement instructions are from the same buildvector.
-static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
- InsertElementInst *V) {
+static bool areTwoInsertFromSameBuildVector(
+ InsertElementInst *VU, InsertElementInst *V,
+ function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
// Instructions must be from the same basic blocks.
if (VU->getParent() != V->getParent())
return false;
getInsertIndex(IE1).value_or(Idx2) == Idx2)
IE1 = nullptr;
else
- IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
+ IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
}
if (IE2) {
if ((IE2 != V && !IE2->hasOneUse()) ||
getInsertIndex(IE2).value_or(Idx1) == Idx1)
IE2 = nullptr;
else
- IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
+ IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
}
} while (IE1 || IE2);
return false;
Optional<unsigned> InsertIdx = getInsertIndex(VU);
if (InsertIdx) {
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
- auto *It =
- find_if(FirstUsers,
- [VU](const std::pair<Value *, const TreeEntry *> &Pair) {
- return areTwoInsertFromSameBuildVector(
- VU, cast<InsertElementInst>(Pair.first));
- });
+ auto *It = find_if(
+ FirstUsers,
+ [this, VU](const std::pair<Value *, const TreeEntry *> &Pair) {
+ return areTwoInsertFromSameBuildVector(
+ VU, cast<InsertElementInst>(Pair.first),
+ [this](InsertElementInst *II) -> Value * {
+ Value *Op0 = II->getOperand(0);
+ if (getTreeEntry(II) && !getTreeEntry(Op0))
+ return nullptr;
+ return Op0;
+ });
+ });
int VecId = -1;
if (It == FirstUsers.end()) {
(void)ShuffleMasks.emplace_back();
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
// Checks if 2 insertelements are from the same buildvector.
InsertElementInst *VecInsert = Data.InsertElements.front();
- return areTwoInsertFromSameBuildVector(VU, VecInsert);
+ return areTwoInsertFromSameBuildVector(
+ VU, VecInsert,
+ [](InsertElementInst *II) { return II->getOperand(0); });
});
unsigned Idx = *InsertIdx;
if (It == ShuffledInserts.end()) {
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
+; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T27]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T47]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T15]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T40]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 6270, i32 poison, i32 poison, i32 poison>, i32 [[T40]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T9]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T48]], i32 3
-; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
+; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
+; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[T71]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 9>
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[TMP13]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
; CHECK-NEXT: ret void