From 46a7f4d6f4bf2cc23a410e771adb587c5968047d Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 25 Jun 2020 14:46:16 +0100 Subject: [PATCH] [SVE][CodeGen] Fix bug in DAGCombiner::reduceBuildVecToShuffle When trying to reduce a BUILD_VECTOR to a SHUFFLE_VECTOR it's important that we carefully check the vector types that led to that BUILD_VECTOR. In the test I have attached to this commit there is a case where the results of two SVE faddv instructions are being stored to consecutive memory locations. With my fix, as part of merging those stores we discover that each BUILD_VECTOR element came from an extract of a SVE vector element and therefore bail out. Differential Revision: https://reviews.llvm.org/D82564 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +++ llvm/test/CodeGen/AArch64/sve-merging-stores.ll | 32 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-merging-stores.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4817c0e7..2568f66 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18287,6 +18287,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); + if (ExtractedFromVec.getValueType().isScalableVector()) + return SDValue(); + const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll new file mode 100644 index 0000000..66a526f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +%complex = type { { double, double } } + +; Function Attrs: argmemonly nounwind readonly +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) #3 + +; Function Attrs: argmemonly nounwind readonly +declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(, double*) #3 + +; Function Attrs: nounwind readnone +declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 + +define void @foo1(%complex* %outval, %pred, double *%inptr) { +; CHECK-LABEL: foo1: +; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1] +; CHECK-NEXT: faddv d2, p0, z0.d +; CHECK-NEXT: faddv d0, p0, z1.d +; CHECK-NEXT: mov v2.d[1], v0.d[0] +; CHECK-NEXT: str q2, [x0] + %realp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 0 + %imagp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 1 + %1 = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( %pred, double* nonnull %inptr) + %2 = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %1, i32 0) + %3 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %2) + %4 = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %1, i32 1) + %5 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %4) + store double %3, double* %realp, align 8 + store double %5, double* %imagp, align 8 + ret void +} + -- 2.7.4