From 52474992b1343d7c2876ed9d176cd963476af4e5 Mon Sep 17 00:00:00 2001 From: Petre-Ionut Tudor Date: Mon, 20 Apr 2020 15:43:56 +0100 Subject: [PATCH] Revert "[ARM] Fix conditions for lowering to S[LR]I" This reverts commit cabfcf840a9d15d92466c6774953d3aa399cde92. The patch introduced another bug in the optimization. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 55 ++++++++++--------------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 +---- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 ++----- llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll | 6 +-- 4 files changed, 30 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index da96eca..cd19696 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -99,6 +99,11 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); +static cl::opt +EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + // FIXME: The necessary dtprel relocations don't seem to be supported // well in the GNU bfd and gold linkers at the moment. Therefore, by // default, for now, fall back to GeneralDynamic code generation. @@ -1318,8 +1323,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; - case AArch64ISD::VSLI: return "AArch64ISD::VSLI"; - case AArch64ISD::VSRI: return "AArch64ISD::VSRI"; case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; @@ -3146,21 +3149,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, "llvm.eh.recoverfp must take a function as the first argument"); return IncomingFPOp; } - - case Intrinsic::aarch64_neon_vsri: - case Intrinsic::aarch64_neon_vsli: { - EVT Ty = Op.getValueType(); - - if (!Ty.isVector()) - report_fatal_error("Unexpected type for aarch64_neon_vsli"); - - assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()); - - bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri; - unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI; - return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2), - Op.getOperand(3)); - } } } @@ -7909,9 +7897,8 @@ static unsigned getIntrinsicID(const SDNode *N) { // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a -// BUILD_VECTORs with constant element C1, C2 is a constant, and: -// - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2) -// - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) +// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2. +// Also, logical shift right -> sri, with the same structure. static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -7944,27 +7931,25 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { if (!isAllConstantBuildVector(And.getOperand(1), C1)) return SDValue(); - // Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or - // C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account - // how much one can shift elements of a particular size? + // Is C1 == ~C2, taking into account how much one can shift elements of a + // particular size? uint64_t C2 = C2node->getZExtValue(); unsigned ElemSizeInBits = VT.getScalarSizeInBits(); if (C2 > ElemSizeInBits) return SDValue(); unsigned ElemMask = (1 << ElemSizeInBits) - 1; - if (IsShiftRight) { - if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask)) - return SDValue(); - } else { - if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask)) - return SDValue(); - } + if ((C1 & ElemMask) != (~C2 & ElemMask)) + return SDValue(); SDValue X = And.getOperand(0); SDValue Y = Shift.getOperand(0); - unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI; - SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1)); + unsigned Intrin = + IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli; + SDValue ResultSLI = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrin, DL, MVT::i32), X, Y, + Shift.getOperand(1)); LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n"); LLVM_DEBUG(N->dump(&DAG)); @@ -7978,8 +7963,10 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, SelectionDAG &DAG) const { // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) - if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG)) - return Res; + if (EnableAArch64SlrGeneration) { + if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG)) + return Res; + } EVT VT = Op.getValueType(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 19b0188..23ee452 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -121,10 +121,6 @@ enum NodeType : unsigned { SRSHR_I, URSHR_I, - // Vector shift by constant and insert - VSLI, - VSRI, - // Vector comparisons CMEQ, CMGE, @@ -200,10 +196,8 @@ enum NodeType : unsigned { UMULL, // Reciprocal estimates and steps. - FRECPE, - FRECPS, - FRSQRTE, - FRSQRTS, + FRECPE, FRECPS, + FRSQRTE, FRSQRTS, SUNPKHI, SUNPKLO, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a0b53ea..f634642 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -231,10 +231,6 @@ def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; -def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>; - def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; @@ -473,8 +469,6 @@ def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; -def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; -def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; @@ -5834,8 +5828,8 @@ defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; -defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; -def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), +defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; +def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", @@ -5848,8 +5842,8 @@ defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", int_aarch64_neon_sqshrun>; -defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; -def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), +defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; +def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll index 204e857..b26542d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind { ; CHECK-LABEL: testLeftGood: ; CHECK: sli.16b v0, v1, #3 - %and.i = and <16 x i8> %src1, + %and.i = and <16 x i8> %src1, %vshl_n = shl <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n store <16 x i8> %result, <16 x i8>* %dest, align 16 @@ -23,7 +23,7 @@ define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nou define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind { ; CHECK-LABEL: testRightGood: ; CHECK: sri.16b v0, v1, #3 - %and.i = and <16 x i8> %src1, + %and.i = and <16 x i8> %src1, %vshl_n = lshr <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n store <16 x i8> %result, <16 x i8>* %dest, align 16 -- 2.7.4