From cacda256a15f37f6bc70de8a9494caa44708b764 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 17 Dec 2012 14:03:01 +0000 Subject: [PATCH] Fix a secondary bug I introduced while fixing the first part of PR14478. The first half of fixing this bug was actually in r170328, but was entirely coincidental. It did however get me to realize the nature of the bug, and adapt the test case to test more interesting behavior. In turn, that uncovered the rest of the bug which I've fixed here. This should fix two new asserts that showed up in the vectorize nightly tester. llvm-svn: 170333 --- llvm/lib/Transforms/Scalar/SROA.cpp | 8 ++----- llvm/test/Transforms/SROA/vector-promotion.ll | 34 +++++++++++++-------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index ef16f46..1ac239e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2777,14 +2777,10 @@ private: Value *Splat = getIntegerSplat(IRB, II.getValue(), TD.getTypeSizeInBits(ElementTy)/8); - if (NumElements > 1) { + Splat = convertValue(TD, IRB, Splat, ElementTy); + if (NumElements > 1) Splat = getVectorSplat(IRB, Splat, NumElements); - Type *SplatVecTy = VectorType::get(ElementTy, NumElements); - if (Splat->getType() != SplatVecTy) - Splat = convertValue(TD, IRB, Splat, SplatVecTy); - } - Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index f957fef..846a432 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -281,37 +281,37 @@ entry: declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind -define <4 x i32> @test_subvec_memset() { +define <4 x float> @test_subvec_memset() { ; CHECK: @test_subvec_memset entry: - %a = alloca <4 x i32> + %a = alloca <4 x float> ; CHECK-NOT: alloca - %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0 - %a.cast0 = bitcast i32* %a.gep0 to i8* + %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0 + %a.cast0 = bitcast float* %a.gep0 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false) ; CHECK-NOT: store -; CHECK: %[[insert1:.*]] = shufflevector <4 x i32> , <4 x i32> undef, <4 x i32> +; CHECK: %[[insert1:.*]] = shufflevector <4 x float> , <4 x float> undef, <4 x i32> - %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1 - %a.cast1 = bitcast i32* %a.gep1 to i8* + %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1 + %a.cast1 = bitcast float* %a.gep1 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false) -; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> , <4 x i32> %[[insert1]], <4 x i32> +; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> , <4 x float> %[[insert1]], <4 x i32> - %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2 - %a.cast2 = bitcast i32* %a.gep2 to i8* + %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2 + %a.cast2 = bitcast float* %a.gep2 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false) -; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> , <4 x i32> %[[insert2]], <4 x i32> +; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> , <4 x float> %[[insert2]], <4 x i32> - %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3 - %a.cast3 = bitcast i32* %a.gep3 to i8* + %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3 + %a.cast3 = bitcast float* %a.gep3 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false) -; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 117901063, i32 3 +; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3 - %ret = load <4 x i32>* %a + %ret = load <4 x float>* %a - ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> %[[insert4]] + ret <4 x float> %ret +; CHECK-NEXT: ret <4 x float> %[[insert4]] } define i32 @PR14212() { -- 2.7.4