From 12eaef75ce8b77b28e17204f7d6202845f2d7599 Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Fri, 7 Jul 2017 16:15:05 +0000 Subject: [PATCH] [ARM] Implement interleaved access bug fix from r306334 r306334 fixed a bug in AArch64 dealing with wide interleaved accesses having pointer types. The bug also exists in ARM, so this patch copies over the fix. llvm-svn: 307409 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++- .../InterleavedAccess/ARM/interleaved-accesses.ll | 29 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d9319cb..4ff9f79 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 5938f9d..715c941 100644 --- a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -854,3 +854,32 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) { %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> ret void } + +define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) { +; NEON-LABEL: @load_factor2_wide_pointer( +; NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* %ptr to i32* +; NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +; NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4) +; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 +; NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*> +; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 +; NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*> +; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8 +; NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +; NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4) +; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 +; NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*> +; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 +; NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*> +; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32> +; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32> +; NEON-NEXT: ret void +; NO_NEON-LABEL: @load_factor2_wide_pointer( +; NO_NEON-NOT: @llvm.arm.neon +; NO_NEON: ret void +; + %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4 + %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> + %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> + ret void +} -- 2.7.4