From e3ea97b04962334e15047b26fbbbc04c90c78946 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 5 Aug 2019 11:12:23 +0000 Subject: [PATCH] [AArch64] Skip isZIPMask check for masks with an odd number of elements. We process 2 elements at a time and expect the number of elements to be even. Similar to D60690. Reviewers: dmgreen, samparker, t.p.northover Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D65400 llvm-svn: 367831 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 ++ .../AArch64/arm64-neon-vector-shuffle-extract.ll | 26 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6a7fdd4..d8c12eb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll index 7ed0e59..2be8b01 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll @@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) { %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> ret <4 x i32> %s3 } + +define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) { +; CHECK-LABEL: zip_mask_check: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT: str s0, [x2] +; CHECK-NEXT: ret + %tmp3 = load <3 x float>, <3 x float>* %p1, align 16 + %tmp4 = load <3 x float>, <3 x float>* %p2, align 4 + %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> + %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> + %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> + %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef) + %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8) + %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> + %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32> + %tmp12 = extractelement <16 x i32> %tmp11, i32 0 + store i32 %tmp12, i32* %p3, align 4 + ret void +} + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 -- 2.7.4