From ba4926efde147744f6aec2100870bac4ee912cc4 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Tue, 20 Sep 2016 19:02:06 +0000 Subject: [PATCH] Revert "[AArch64] Use the reciprocal estimation machinery" This reverts commit b7d42b0048f65346e9fa37fb65defeea7ce8c337 per request by Eric Christopher (v. http://bit.ly/2cmz6kW). llvm-svn: 282000 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 36 ----- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 9 -- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 ---- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 28 +--- llvm/lib/Target/AArch64/AArch64TargetMachine.h | 2 +- llvm/test/CodeGen/AArch64/recp-fastmath.ll | 79 ----------- llvm/test/CodeGen/AArch64/sqrt-fastmath.ll | 160 ----------------------- 7 files changed, 3 insertions(+), 340 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/recp-fastmath.ll delete mode 100644 llvm/test/CodeGen/AArch64/sqrt-fastmath.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1fca582..a611549 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -953,8 +953,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; case AArch64ISD::SMULL: return "AArch64ISD::SMULL"; case AArch64ISD::UMULL: return "AArch64ISD::UMULL"; - case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; - case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE"; } return nullptr; } @@ -4585,40 +4583,6 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // AArch64 Optimization Hooks //===----------------------------------------------------------------------===// -/// getEstimate - Return the appropriate estimate DAG for either the reciprocal -/// or the reciprocal square root. -static SDValue getEstimate(const AArch64Subtarget &ST, - const AArch64TargetLowering::DAGCombinerInfo &DCI, unsigned Opcode, - const SDValue &Operand, unsigned &ExtraSteps) { - if (!ST.hasNEON()) - return SDValue(); - - EVT VT = Operand.getValueType(); - - std::string RecipOp; - RecipOp = Opcode == (AArch64ISD::FRECPE) ? "div": "sqrt"; - RecipOp = ((VT.isVector()) ? "vec-": "") + RecipOp; - RecipOp += (VT.getScalarType() == MVT::f64) ? "d": "f"; - - TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - ExtraSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); -} - -SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, unsigned &ExtraSteps) const { - return getEstimate(*Subtarget, DCI, AArch64ISD::FRECPE, Operand, ExtraSteps); -} - -SDValue AArch64TargetLowering::getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, unsigned &ExtraSteps, bool &UseOneConst) const { - UseOneConst = true; - return getEstimate(*Subtarget, DCI, AArch64ISD::FRSQRTE, Operand, ExtraSteps); -} - //===----------------------------------------------------------------------===// // AArch64 Inline Assembly Support //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2d75b9f..86f1d97 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -187,10 +187,6 @@ enum NodeType : unsigned { SMULL, UMULL, - // Reciprocal estimates. - FRECPE, - FRSQRTE, - // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, @@ -521,11 +517,6 @@ private: SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, - bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9c77b58..e58ad27 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -286,9 +286,6 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; -def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; -def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; - def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; @@ -3409,19 +3406,6 @@ def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), - (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), - (FRECPEv2f32 V64:$Rn)>; -def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), - (FRECPEv4f32 FPR128:$Rn)>; -def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), - (FRECPEv2f64 FPR128:$Rn)>; - def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), (FRECPXv1i32 FPR32:$Rn)>; def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), @@ -3434,19 +3418,6 @@ def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), - (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), - (FRSQRTEv2f32 V64:$Rn)>; -def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), - (FRSQRTEv4f32 FPR128:$Rn)>; -def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), - (FRSQRTEv2f64 FPR128:$Rn)>; - // If an integer is about to be converted to a floating point value, // just load it on the floating point unit. // Here are the patterns for 8 and 16-bits to float. diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 9faeb6f..52ccc78 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -167,29 +167,6 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; } -// Helper function to set up the defaults for reciprocals. -static void initReciprocals(AArch64TargetMachine& TM, AArch64Subtarget& ST) -{ - // For the estimates, convergence is quadratic, so essentially the number of - // digits is doubled after each iteration. ARMv8, the minimum architected - // accuracy of the initial estimate is 2^-8. Therefore, the number of extra - // steps to refine the result for float (23 mantissa bits) and for double - // (52 mantissa bits) are 2 and 3, respectively. - unsigned ExtraStepsF = 2, - ExtraStepsD = ExtraStepsF + 1; - bool UseRsqrt = ST.useRSqrt(); - - TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF); - TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD); - TM.Options.Reciprocals.setDefaults("vec-sqrtf", UseRsqrt, ExtraStepsF); - TM.Options.Reciprocals.setDefaults("vec-sqrtd", UseRsqrt, ExtraStepsD); - - TM.Options.Reciprocals.setDefaults("divf", false, ExtraStepsF); - TM.Options.Reciprocals.setDefaults("divd", false, ExtraStepsD); - TM.Options.Reciprocals.setDefaults("vec-divf", false, ExtraStepsF); - TM.Options.Reciprocals.setDefaults("vec-divd", false, ExtraStepsD); -} - static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { // AArch64 Darwin is always PIC. @@ -214,8 +191,7 @@ AArch64TargetMachine::AArch64TargetMachine( : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, OL), TLOF(createTLOF(getTargetTriple())), - Subtarget(TT, CPU, FS, *this, LittleEndian) { - initReciprocals(*this, Subtarget); + isLittle(LittleEndian) { initAsmInfo(); } @@ -263,7 +239,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, - Subtarget.isLittleEndian()); + isLittle); #ifndef LLVM_BUILD_GLOBAL_ISEL GISelAccessor *GISel = new GISelAccessor(); #else diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index b44107b..6fa5e83 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -46,7 +46,7 @@ public: } private: - AArch64Subtarget Subtarget; + bool isLittle; }; // AArch64 little endian target machine. diff --git a/llvm/test/CodeGen/AArch64/recp-fastmath.ll b/llvm/test/CodeGen/AArch64/recp-fastmath.ll deleted file mode 100644 index 710739b..0000000 --- a/llvm/test/CodeGen/AArch64/recp-fastmath.ll +++ /dev/null @@ -1,79 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!div,!vec-div | FileCheck %s --check-prefix=FAULT -; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=div,vec-div | FileCheck %s - -define float @frecp(float %x) #0 { - %div = fdiv fast float 1.0, %x - ret float %div - -; FAULT-LABEL: frecp: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fmov -; FAULT-NEXT: fdiv - -; CHECK-LABEL: frecp: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: frecpe -; CHECK-NEXT: fmov -} - -define <2 x float> @f2recp(<2 x float> %x) #0 { - %div = fdiv fast <2 x float> , %x - ret <2 x float> %div - -; FAULT-LABEL: f2recp: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fmov -; FAULT-NEXT: fdiv - -; CHECK-LABEL: f2recp: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frecpe -} - -define <4 x float> @f4recp(<4 x float> %x) #0 { - %div = fdiv fast <4 x float> , %x - ret <4 x float> %div - -; FAULT-LABEL: f4recp: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fmov -; FAULT-NEXT: fdiv - -; CHECK-LABEL: f4recp: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frecpe -} - -define double @drecp(double %x) #0 { - %div = fdiv fast double 1.0, %x - ret double %div - -; FAULT-LABEL: drecp: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fmov -; FAULT-NEXT: fdiv - -; CHECK-LABEL: drecp: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: frecpe -; CHECK-NEXT: fmov -} - -define <2 x double> @d2recp(<2 x double> %x) #0 { - %div = fdiv fast <2 x double> , %x - ret <2 x double> %div - -; FAULT-LABEL: d2recp: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fmov -; FAULT-NEXT: fdiv - -; CHECK-LABEL: d2recp: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frecpe -} - -attributes #0 = { nounwind "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll deleted file mode 100644 index 0d9533f..0000000 --- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ /dev/null @@ -1,160 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT -; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root | FileCheck %s --check-prefix=FAULT -; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s - -declare float @llvm.sqrt.f32(float) #1 -declare double @llvm.sqrt.f64(double) #1 -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1 -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1 -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1 - -define float @fsqrt(float %a) #0 { - %1 = tail call fast float @llvm.sqrt.f32(float %a) - ret float %1 - -; FAULT-LABEL: fsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: fsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define <2 x float> @f2sqrt(<2 x float> %a) #0 { - %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2 - ret <2 x float> %1 - -; FAULT-LABEL: f2sqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f2sqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: mov -; CHECK-NEXT: frsqrte -} - -define <4 x float> @f4sqrt(<4 x float> %a) #0 { - %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2 - ret <4 x float> %1 - -; FAULT-LABEL: f4sqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f4sqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: mov -; CHECK-NEXT: frsqrte -} - -define double @dsqrt(double %a) #0 { - %1 = tail call fast double @llvm.sqrt.f64(double %a) - ret double %1 - -; FAULT-LABEL: dsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: dsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define <2 x double> @d2sqrt(<2 x double> %a) #0 { - %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2 - ret <2 x double> %1 - -; FAULT-LABEL: d2sqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: d2sqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: mov -; CHECK-NEXT: frsqrte -} - -define float @frsqrt(float %a) #0 { - %1 = tail call fast float @llvm.sqrt.f32(float %a) - %2 = fdiv fast float 1.000000e+00, %1 - ret float %2 - -; FAULT-LABEL: frsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: frsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define <2 x float> @f2rsqrt(<2 x float> %a) #0 { - %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2 - %2 = fdiv fast <2 x float> , %1 - ret <2 x float> %2 - -; FAULT-LABEL: f2rsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f2rsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define <4 x float> @f4rsqrt(<4 x float> %a) #0 { - %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2 - %2 = fdiv fast <4 x float> , %1 - ret <4 x float> %2 - -; FAULT-LABEL: f4rsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f4rsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define double @drsqrt(double %a) #0 { - %1 = tail call fast double @llvm.sqrt.f64(double %a) - %2 = fdiv fast double 1.000000e+00, %1 - ret double %2 - -; FAULT-LABEL: drsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: drsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -define <2 x double> @d2rsqrt(<2 x double> %a) #0 { - %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2 - %2 = fdiv fast <2 x double> , %1 - ret <2 x double> %2 - -; FAULT-LABEL: d2rsqrt: -; FAULT-NEXT: BB#0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: d2rsqrt: -; CHECK-NEXT: BB#0 -; CHECK-NEXT: fmov -; CHECK-NEXT: frsqrte -} - -attributes #0 = { nounwind "unsafe-fp-math"="true" } -- 2.7.4