case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
- case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
+ case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
+ case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
+ case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
}
return nullptr;
}
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
DAG, ExtraSteps)) {
- UseOneConst = true;
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
+
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
+ // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
+ // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
+ for (int i = ExtraSteps; i > 0; --i) {
+ SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
+ &Flags);
+ Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags);
+ Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+ }
+
+ if (!Reciprocal) {
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
+
+ Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags);
+ // Correct the result if the operand is 0.0.
+ Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
+ VT, Eq, Operand, Estimate);
+ }
+
+ ExtraSteps = 0;
return Estimate;
}
int &ExtraSteps) const {
if (Enabled == ReciprocalEstimate::Enabled)
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
- DAG, ExtraSteps))
+ DAG, ExtraSteps)) {
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
+
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
+ // Newton reciprocal iteration: E * (2 - X * E)
+ // AArch64 reciprocal iteration instruction: (2 - M * N)
+ for (int i = ExtraSteps; i > 0; --i) {
+ SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
+ Estimate, &Flags);
+ Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+ }
+
+ ExtraSteps = 0;
return Estimate;
+ }
return SDValue();
}
SMULL,
UMULL,
- // Reciprocal estimates.
- FRECPE,
- FRSQRTE,
+ // Reciprocal estimates and steps.
+ FRECPE, FRECPS,
+ FRSQRTE, FRSQRTS,
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
+def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
+def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
(FRECPEv2f64 FPR128:$Rn)>;
+def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+ (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+ (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
(FRECPXv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
(FRSQRTEv2f64 FPR128:$Rn)>;
+def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+ (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+ (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.
; CHECK-LABEL: frecp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
-; CHECK-NEXT: fmov
+; CHECK-NEXT: frecpe [[R:s[0-7]]]
+; CHECK-NEXT: frecps {{s[0-7](, s[0-7])?}}, [[R]]
}
define <2 x float> @f2recp0(<2 x float> %x) #0 {
; CHECK-LABEL: f2recp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: frecpe [[R:v[0-7]\.2s]]
+; CHECK-NEXT: frecps {{v[0-7]\.2s(, v[0-7].2s)?}}, [[R]]
}
define <4 x float> @f4recp0(<4 x float> %x) #0 {
; CHECK-LABEL: f4recp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]]
+; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]]
}
define <8 x float> @f8recp0(<8 x float> %x) #0 {
; CHECK-LABEL: f8recp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
-; CHECK: frecpe
+; CHECK-NEXT: frecpe [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: frecpe [[RB:v[0-7]\.4s]]
+; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RA]]
+; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RB]]
}
define double @drecp0(double %x) #0 {
; CHECK-LABEL: drecp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
-; CHECK-NEXT: fmov
+; CHECK-NEXT: frecpe [[R:d[0-7]]]
+; CHECK-NEXT: frecps {{d[0-7](, d[0-7])?}}, [[R]]
}
define <2 x double> @d2recp0(<2 x double> %x) #0 {
; CHECK-LABEL: d2recp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]]
+; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]]
}
define <4 x double> @d4recp0(<4 x double> %x) #0 {
; CHECK-LABEL: d4recp1:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
-; CHECK: frecpe
+; CHECK-NEXT: frecpe [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: frecpe [[RB:v[0-7]\.2d]]
+; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RA]]
+; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RB]]
}
attributes #0 = { nounwind "unsafe-fp-math"="true" }
; CHECK-LABEL: fsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
+; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
+; CHECK: fcmp s0, #0
}
define <2 x float> @f2sqrt(<2 x float> %a) #0 {
; CHECK-LABEL: f2sqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
}
define <4 x float> @f4sqrt(<4 x float> %a) #0 {
; CHECK-LABEL: f4sqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}
define <8 x float> @f8sqrt(<8 x float> %a) #0 {
; CHECK-LABEL: f8sqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
-; CHECK: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0
}
define double @dsqrt(double %a) #0 {
; CHECK-LABEL: dsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
+; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
+; CHECK: fcmp d0, #0
}
define <2 x double> @d2sqrt(<2 x double> %a) #0 {
; CHECK-LABEL: d2sqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}
define <4 x double> @d4sqrt(<4 x double> %a) #0 {
; CHECK-LABEL: d4sqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
-; CHECK: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2d, v[0-1]\.2d}}, #0
}
define float @frsqrt(float %a) #0 {
; CHECK-LABEL: frsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
+; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
+; CHECK-NOT: fcmp {{s[0-7]}}, #0
}
define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
; CHECK-LABEL: f2rsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
}
define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
; CHECK-LABEL: f4rsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}
define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
; CHECK-LABEL: f8rsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-; CHECK: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}
define double @drsqrt(double %a) #0 {
; CHECK-LABEL: drsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
+; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
+; CHECK-NOT: fcmp d0, #0
}
define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
; CHECK-LABEL: d2rsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}
define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
; CHECK-LABEL: d4rsqrt:
; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-; CHECK: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}
attributes #0 = { nounwind "unsafe-fp-math"="true" }