/// \returns inserting index of InsertElement or InsertValue instruction,
/// using Offset as base offset for index.
-static Optional<unsigned> getInsertIndex(Value *InsertInst,
+static Optional<unsigned> getInsertIndex(const Value *InsertInst,
unsigned Offset = 0) {
int Index = Offset;
- if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
- if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+ if (const auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
auto *VT = cast<FixedVectorType>(IE->getType());
if (CI->getValue().uge(VT->getNumElements()))
return None;
return None;
}
- auto *IV = cast<InsertValueInst>(InsertInst);
+ const auto *IV = cast<InsertValueInst>(InsertInst);
Type *CurrentType = IV->getType();
for (unsigned I : IV->indices()) {
- if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ if (const auto *ST = dyn_cast<StructType>(CurrentType)) {
Index *= ST->getNumElements();
CurrentType = ST->getElementType(I);
- } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ } else if (const auto *AT = dyn_cast<ArrayType>(CurrentType)) {
Index *= AT->getNumElements();
CurrentType = AT->getElementType();
} else {
return false;
auto *IE1 = VU;
auto *IE2 = V;
+ unsigned Idx1 = *getInsertIndex(IE1);
+ unsigned Idx2 = *getInsertIndex(IE2);
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
if (IE2 == VU || IE1 == V)
return true;
if (IE1) {
- if (IE1 != VU && !IE1->hasOneUse())
+ if ((IE1 != VU && !IE1->hasOneUse()) ||
+ getInsertIndex(IE1).getValueOr(Idx2) == Idx2)
IE1 = nullptr;
else
IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
}
if (IE2) {
- if (IE2 != V && !IE2->hasOneUse())
+ if ((IE2 != V && !IE2->hasOneUse()) ||
+ getInsertIndex(IE2).getValueOr(Idx1) == Idx1)
IE2 = nullptr;
else
IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
const auto *I2 = IE2;
const InsertElementInst *PrevI1;
const InsertElementInst *PrevI2;
+ unsigned Idx1 = *getInsertIndex(IE1);
+ unsigned Idx2 = *getInsertIndex(IE2);
do {
if (I2 == IE1)
return true;
return false;
PrevI1 = I1;
PrevI2 = I2;
- if (I1 && (I1 == IE1 || I1->hasOneUse()))
+ if (I1 && (I1 == IE1 || I1->hasOneUse()) &&
+ getInsertIndex(I1).getValueOr(Idx2) != Idx2)
I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
- if (I2 && (I2 == IE2 || I2->hasOneUse()))
+ if (I2 && ((I2 == IE2 || I2->hasOneUse())) &&
+ getInsertIndex(I2).getValueOr(Idx1) != Idx1)
I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
} while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2));
llvm_unreachable("Two different buildvectors not expected.");
// Find the insertvector, vectorized in tree, if any.
Value *Base = VU;
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
- if (IEBase != EU.User && !IEBase->hasOneUse())
+ if (IEBase != EU.User &&
+ (!IEBase->hasOneUse() ||
+ getInsertIndex(IEBase).getValueOr(*InsertIdx) == *InsertIdx))
break;
// Build the mask for the vectorized insertelement instructions.
if (const TreeEntry *E = getTreeEntry(IEBase)) {
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-android23 < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0
+; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
+; CHECK-NEXT: ret void
+;
+ %1 = getelementptr inbounds float, ptr undef, i32 2
+ %2 = load float, ptr %1, align 4
+ %3 = load float, ptr undef, align 4
+ %4 = fsub float %2, %3
+ %5 = getelementptr inbounds float, ptr undef, i32 3
+ %6 = load float, ptr %5, align 4
+ %7 = getelementptr inbounds float, ptr undef, i32 1
+ %8 = load float, ptr %7, align 4
+ %9 = fsub float %6, %8
+ %10 = fcmp olt float %9, %4
+ %11 = insertelement <2 x float> undef, float %3, i64 0
+ %12 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+ store <2 x float> zeroinitializer, ptr null, align 4
+ %13 = insertelement <2 x float> %11, float %6, i64 0
+ store <2 x float> zeroinitializer, ptr null, align 4
+ ret void
+}