if (CandidateTys.empty())
return nullptr;
+ // Generate new candidate type based on load/store size.
+ for (const Slice &S : P) {
+ Type *Ty;
+ if (LoadInst *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
+ Ty = LI->getType();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
+ Ty = SI->getValueOperand()->getType();
+ else
+ continue;
+ if (isa<VectorType>(Ty))
+ continue;
+ // Create Vector with size of V, and each element of type Ty
+ VectorType *V = CandidateTys[0];
+ uint64_t ElementSize = DL.getTypeStoreSizeInBits(Ty).getFixedSize();
+ uint64_t VectorSize = DL.getTypeSizeInBits(V).getFixedSize();
+ if ((ElementSize != VectorSize) && (VectorSize % ElementSize == 0)) {
+ VectorType *VTy = VectorType::get(Ty, VectorSize / ElementSize, false);
+ CandidateTys.push_back(VTy);
+ if (CommonEltTy != Ty)
+ HaveCommonEltTy = false;
+ }
+ }
// Remove non-integer vector types if we had multiple common element types.
// FIXME: It'd be nice to replace them with integer vector types, but we can't
// do that until all the backends are known to produce good code for all
return cast<FixedVectorType>(RHSTy)->getNumElements() <
cast<FixedVectorType>(LHSTy)->getNumElements();
};
+ auto RankVectorTypesEq = [&](VectorType *LHSTy, VectorType *RHSTy) {
+ return cast<FixedVectorType>(LHSTy)->getNumElements() ==
+ cast<FixedVectorType>(RHSTy)->getNumElements();
+ };
llvm::sort(CandidateTys, RankVectorTypes);
- CandidateTys.erase(
- std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
- CandidateTys.end());
+ CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
+ RankVectorTypesEq),
+ CandidateTys.end());
} else {
// The only way to have the same element type in every vector type is to
// have the same vector type. Check that and remove all but one.
; heuristic for making a deterministic decision.
; CHECK-LABEL: @test11(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16>
-; CHECK-NEXT: [[A_SROA_0_4_VEC_EXPAND:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
-; CHECK-NEXT: [[A_SROA_0_4_VECBLEND:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> [[A_SROA_0_4_VEC_EXPAND]], <4 x i16> [[X:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[A_SROA_0_4_VECBLEND]] to <2 x float>
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[A_SROA_0_4_VEC_INSERT]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
entry:
ret <4 x float> %vec
}
+
+define <2 x i64> @test13(i32 %a, i32 %b, i32 %c, i32 %d) {
+; Ensure that we can promote an alloca that needs to be
+; cast to a different vector type
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2
+; CHECK-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[X_SROA_0_12_VEC_INSERT]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP0]]
+;
+entry:
+ %x = alloca [4 x i32]
+
+ store i32 %a, ptr %x
+ %x.tmp2 = getelementptr inbounds i32, ptr %x, i64 1
+ store i32 %b, ptr %x.tmp2
+ %x.tmp3 = getelementptr inbounds i32, ptr %x, i64 2
+ store i32 %c, ptr %x.tmp3
+ %x.tmp4 = getelementptr inbounds i32, ptr %x, i64 3
+ store i32 %d, ptr %x.tmp4
+
+
+ %result = load <2 x i64>, ptr %x
+
+ ret <2 x i64> %result
+}
+
+define i32 @test14(<2 x i64> %x) {
+; Ensure that we can promote an alloca that needs to be
+; cast to a different vector type
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT: [[X_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[X_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+; CHECK-NEXT: [[X_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+; CHECK-NEXT: [[X_SROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X_SROA_0_0_VEC_EXTRACT]], [[X_SROA_0_4_VEC_EXTRACT]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X_SROA_0_8_VEC_EXTRACT]], [[X_SROA_0_12_VEC_EXTRACT]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]]
+; CHECK-NEXT: ret i32 [[ADD2]]
+;
+entry:
+
+ %x.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %x, <2 x i64>* %x.addr, align 16
+ %x.cast = bitcast <2 x i64>* %x.addr to i32*
+
+ %a = load i32, ptr %x.cast
+ %x.tmp2 = getelementptr inbounds i32, ptr %x.cast, i64 1
+ %b = load i32, ptr %x.tmp2
+ %x.tmp3 = getelementptr inbounds i32, ptr %x.cast, i64 2
+ %c = load i32, ptr %x.tmp3
+ %x.tmp4 = getelementptr inbounds i32, ptr %x.cast, i64 3
+ %d = load i32, ptr %x.tmp4
+
+ %add = add i32 %a, %b
+ %add1 = add i32 %c, %d
+ %add2 = add i32 %add, %add1
+ ret i32 %add2
+}