From 2a661f3f7376b80a9adbf0aaa94c6ecd5cf7b940 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 10 Mar 2014 09:34:07 +0000 Subject: [PATCH] AArch64: fix LowerCONCAT_VECTORS for new CodeGen. The function was making too many assumptions about its input: 1. The NEON_VDUP optimisation was far too aggressive, assuming (I think) that the input would always be BUILD_VECTOR. 2. We were treating most unknown concats as legal (by returning Op rather than SDValue()). I think only concats of pairs of vectors are actually legal. http://llvm.org/PR19094 llvm-svn: 203450 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 21 +++++++++++---------- ...oncatvector-v8i8-bug.ll => concatvector-bugs.ll} | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 10 deletions(-) rename llvm/test/CodeGen/AArch64/{concatvector-v8i8-bug.ll => concatvector-bugs.ll} (68%) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 447f500..cf2e46d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // We custom lower concat_vectors with 4, 8, or 16 operands that are all the // same operand and of type v1* using the DUP instruction. unsigned NumOps = Op->getNumOperands(); - if (NumOps != 4 && NumOps != 8 && NumOps != 16) + if (NumOps == 2) { + assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat"); return Op; + } + + if (NumOps != 4 && NumOps != 8 && NumOps != 16) + return SDValue(); // Must be a single value for VDUP. - bool isConstant = true; SDValue Op0 = Op.getOperand(0); for (unsigned i = 1; i < NumOps; ++i) { SDValue OpN = Op.getOperand(i); if (Op0 != OpN) - return Op; - - if (!isa(OpN->getOperand(0))) - isConstant = false; + return SDValue(); } // Verify the value type. @@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { default: llvm_unreachable("Unexpected number of operands"); case 4: if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) - return Op; + return SDValue(); break; case 8: if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) - return Op; + return SDValue(); break; case 16: if (EltVT != MVT::v1i8) - return Op; + return SDValue(); break; } SDLoc DL(Op); EVT VT = Op.getValueType(); // VDUP produces better code for constants. - if (isConstant) + if (Op0->getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, DAG.getConstant(0, MVT::i64)); diff --git a/llvm/test/CodeGen/AArch64/concatvector-v8i8-bug.ll b/llvm/test/CodeGen/AArch64/concatvector-bugs.ll similarity index 68% rename from llvm/test/CodeGen/AArch64/concatvector-v8i8-bug.ll rename to llvm/test/CodeGen/AArch64/concatvector-bugs.ll index f8854c3..5889e22 100644 --- a/llvm/test/CodeGen/AArch64/concatvector-v8i8-bug.ll +++ b/llvm/test/CodeGen/AArch64/concatvector-bugs.ll @@ -45,3 +45,24 @@ for.body130.us.us: ; preds = %for.body130.us.us, br label %for.body130.us.us } +declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) + +define <8 x i16> @test_splat(i32 %l) nounwind { +; CHECK-LABEL: test_splat: +; CHECK: ret + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %vec +} + + +define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind { +; CHECK-LABEL: test_notsplat: +; CHECK: ret +entry: + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> + ret <8 x i16> %vec +} -- 2.7.4