From 67953311e2e370a9fcf77595d66d39c505565382 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 7 Apr 2021 10:03:31 -0700 Subject: [PATCH] [SelectionDAG] Teach SelectionDAG::FoldConstantArithmetic to handle SPLAT_VECTOR This allows FoldConstantArithmetic to handle SPLAT_VECTOR in addition to BUILD_VECTOR. This allows it to support scalable vectors. I'm also allowing fixed length SPLAT_VECTOR which is used by some targets, but I'm not familiar enough to write tests for those targets. I had to block this function from running on CONCAT_VECTORS to avoid calling getNode for a CONCAT_VECTORS of 2 scalars. This can happen because the 2 operand getNode calls this function for any opcode. Previously we were protected because CONCAT_VECTORs of BUILD_VECTOR is folded to a larger BUILD_VECTOR before that call. But it's not always possible to fold a CONCAT_VECTORS of SPLAT_VECTORs, and we don't even try. This fixes PR49781 where DAG combine thought constant folding should be possible, but FoldConstantArithmetic couldn't do it. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D99682 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 83 +++++++++++++++++------- llvm/test/CodeGen/AArch64/pr49781.ll | 13 ++++ llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll | 15 +++++ llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll | 10 +-- llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll | 15 +++++ llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll | 15 +++++ 7 files changed, 120 insertions(+), 35 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/pr49781.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cfe0340..89371aa5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5056,7 +5056,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. - if (Opcode >= ISD::BUILTIN_OP_END) + // We can't create a scalar CONCAT_VECTORS so skip it. It will break + // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by + // foldCONCAT_VECTORS in getNode before this is called. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); // For now, the array Ops should only contain two values. @@ -5096,27 +5099,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // TODO: All the folds below are performed lane-by-lane and assume a fixed - // vector width, however we should be able to do constant folds involving - // splat vector nodes too. - if (VT.isScalableVector()) - return SDValue(); - // For fixed width vectors, extract each constant element and fold them // individually. Either input may be an undef value. - auto *BV1 = dyn_cast(N1); - if (!BV1 && !N1->isUndef()) + bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || + N1->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV1 && !N1->isUndef()) return SDValue(); - auto *BV2 = dyn_cast(N2); - if (!BV2 && !N2->isUndef()) + bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || + N2->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV2 && !N2->isUndef()) return SDValue(); // If both operands are undef, that's handled the same way as scalars. - if (!BV1 && !BV2) + if (!IsBVOrSV1 && !IsBVOrSV2) return SDValue(); - assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && - "Vector binop with different number of elements in operands?"); - EVT SVT = VT.getScalarType(); EVT LegalSVT = SVT; if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { @@ -5124,19 +5120,46 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (LegalSVT.bitsLT(SVT)) return SDValue(); } + SmallVector Outputs; - unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands(); + unsigned NumOps = 0; + if (IsBVOrSV1) + NumOps = std::max(NumOps, N1->getNumOperands()); + if (IsBVOrSV2) + NumOps = std::max(NumOps, N2->getNumOperands()); + assert(NumOps != 0 && "Expected non-zero operands"); + // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need + // one iteration for that. + assert((!VT.isScalableVector() || NumOps == 1) && + "Scalar vector should only have one scalar"); + for (unsigned I = 0; I != NumOps; ++I) { - SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT); - SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT); + // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need + // to use operand 0 of the SPLAT_VECTOR for each fixed element. + SDValue V1; + if (N1->getOpcode() == ISD::BUILD_VECTOR) + V1 = N1->getOperand(I); + else if (N1->getOpcode() == ISD::SPLAT_VECTOR) + V1 = N1->getOperand(0); + else + V1 = getUNDEF(SVT); + + SDValue V2; + if (N2->getOpcode() == ISD::BUILD_VECTOR) + V2 = N2->getOperand(I); + else if (N2->getOpcode() == ISD::SPLAT_VECTOR) + V2 = N2->getOperand(0); + else + V2 = getUNDEF(SVT); + if (SVT.isInteger()) { - if (V1->getValueType(0).bitsGT(SVT)) + if (V1.getValueType().bitsGT(SVT)) V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2->getValueType(0).bitsGT(SVT)) + if (V2.getValueType().bitsGT(SVT)) V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); } - if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + if (V1.getValueType() != SVT || V2.getValueType() != SVT) return SDValue(); // Fold one vector element. @@ -5151,11 +5174,21 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, Outputs.push_back(ScalarResult); } - assert(VT.getVectorNumElements() == Outputs.size() && - "Vector size mismatch!"); + if (N1->getOpcode() == ISD::BUILD_VECTOR || + N2->getOpcode() == ISD::BUILD_VECTOR) { + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); + + // Build a big vector out of the scalar elements we generated. + return getBuildVector(VT, SDLoc(), Outputs); + } + + assert((N1->getOpcode() == ISD::SPLAT_VECTOR || + N2->getOpcode() == ISD::SPLAT_VECTOR) && + "One operand should be a splat vector"); - // Build a big vector out of the scalar elements we generated. - return getBuildVector(VT, SDLoc(), Outputs); + assert(Outputs.size() == 1 && "Vector size mismatch!"); + return getSplatVector(VT, SDLoc(), Outputs[0]); } // TODO: Merge with FoldConstantArithmetic diff --git a/llvm/test/CodeGen/AArch64/pr49781.ll b/llvm/test/CodeGen/AArch64/pr49781.ll new file mode 100644 index 0000000..066feda --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr49781.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define @foo( %a) { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %b = sub %a, %idx + %c = sub %b, %idx + ret %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll index abd5cce..84f1190 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll @@ -38,13 +38,11 @@ define @vadd_vx_nxv1i8_1( %va) { } ; Test constant adds to see if we can optimize them away for scalable vectors. -; FIXME: We can't. define @vadd_ii_nxv1i8_1() { ; CHECK-LABEL: vadd_ii_nxv1i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vmv.v.i v25, 2 -; CHECK-NEXT: vadd.vi v8, v25, 3 +; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: ret %heada = insertelement undef, i8 2, i32 0 %splata = shufflevector %heada, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll index a54103d..cc31c77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll @@ -37,6 +37,21 @@ define @vadd_vx_nxv1i8_1( %va) { ret %vc } +; Test constant adds to see if we can optimize them away for scalable vectors. +define @vadd_ii_nxv1i8_1() { +; CHECK-LABEL: vadd_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, 5 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = add %splata, %splatb + ret %vc +} + define @vadd_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vadd_vx_nxv2i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll index 2798b67..fbbae63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll @@ -11,13 +11,9 @@ define @srem_eq_fold_nxv4i8( %va) { ; CHECK-NEXT: vmul.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 42 ; CHECK-NEXT: vadd.vx v25, v25, a0 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vrsub.vi v27, v26, 0 -; CHECK-NEXT: vand.vi v27, v27, 7 -; CHECK-NEXT: vsll.vv v27, v25, v27 -; CHECK-NEXT: vand.vi v26, v26, 7 -; CHECK-NEXT: vsrl.vv v25, v25, v26 -; CHECK-NEXT: vor.vv v25, v25, v27 +; CHECK-NEXT: vsll.vi v26, v25, 7 +; CHECK-NEXT: vsrl.vi v25, v25, 1 +; CHECK-NEXT: vor.vv v25, v25, v26 ; CHECK-NEXT: vmsleu.vx v0, v25, a0 ; CHECK-NEXT: ret %head_six = insertelement undef, i8 6, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll index e2317eb..fb5ddf0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll @@ -36,6 +36,21 @@ define @vsub_vx_nxv1i8_0( %va) { ret %vc } +; Test constant subs to see if we can optimize them away for scalable vectors. +define @vsub_ii_nxv1i8_1() { +; CHECK-LABEL: vsub_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = sub %splata, %splatb + ret %vc +} + define @vsub_vv_nxv2i8( %va, %vb) { ; CHECK-LABEL: vsub_vv_nxv2i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll index 090bae1..db8702f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll @@ -36,6 +36,21 @@ define @vsub_vx_nxv1i8_0( %va) { ret %vc } +; Test constant subs to see if we can optimize them away for scalable vectors. +define @vsub_ii_nxv1i8_1() { +; CHECK-LABEL: vsub_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = sub %splata, %splatb + ret %vc +} + define @vsub_vv_nxv2i8( %va, %vb) { ; CHECK-LABEL: vsub_vv_nxv2i8: ; CHECK: # %bb.0: -- 2.7.4