From ce8d60156c2bd6754f760cd0186017ae6af6ee0a Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Tue, 18 Oct 2016 20:37:35 +0000 Subject: [PATCH] [AArch64] Avoid materializing 0.0 when generating FP SELECT Transform `a == 0.0 ? 0.0 : x` to `a == 0.0 ? a : x` and `a != 0.0 ? x : 0.0` to `a != 0.0 ? x : a` to avoid materializing 0.0 for FCSEL, since it does not have to be materialized beforehand for FCMP, as it has a form that has 0.0 as an implicit operand. Differential Revision: https://reviews.llvm.org/D24808 llvm-svn: 284531 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 19 ++++++ llvm/test/CodeGen/AArch64/fcsel-zero.ll | 82 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/fcsel-zero.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 17cf8e0..8ef6d55 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4083,6 +4083,25 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, // clean. Some of them require two CSELs to implement. AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); + + if (DAG.getTarget().Options.UnsafeFPMath) { + // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and + // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0. + ConstantFPSDNode *RHSVal = dyn_cast(RHS); + if (RHSVal && RHSVal->isZero()) { + ConstantFPSDNode *CFVal = dyn_cast(FVal); + ConstantFPSDNode *CTVal = dyn_cast(TVal); + + if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) && + CTVal && CTVal->isZero()) + TVal = LHS; + else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) && + CFVal && CFVal->isZero()) + FVal = LHS; + } + } + + // Emit first, and possibly only, CSEL. SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll new file mode 100644 index 0000000..43e07bf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll @@ -0,0 +1,82 @@ +; Check that 0.0 is not materialized for CSEL when comparing against it. + +; RUN: llc -mtriple=aarch64 -o - < %s | FileCheck %s + +define float @foeq(float %a, float %b) #0 { + %t = fcmp oeq float %a, 0.0 + %v = select i1 %t, float 0.0, float %b + ret float %v +; CHECK-LABEL: foeq +; CHECK: fcmp [[R:s[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq +} + +define float @fueq(float %a, float %b) #0 { + %t = fcmp ueq float %a, 0.0 + %v = select i1 %t, float 0.0, float %b + ret float %v +; CHECK-LABEL: fueq +; CHECK: fcmp [[R:s[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq +; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs +} + +define float @fone(float %a, float %b) #0 { + %t = fcmp one float %a, 0.0 + %v = select i1 %t, float %b, float 0.0 + ret float %v +; CHECK-LABEL: fone +; CHECK: fcmp [[R:s[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], mi +; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt +} + +define float @fune(float %a, float %b) #0 { + %t = fcmp une float %a, 0.0 + %v = select i1 %t, float %b, float 0.0 + ret float %v +; CHECK-LABEL: fune +; CHECK: fcmp [[R:s[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne +} + +define double @doeq(double %a, double %b) #0 { + %t = fcmp oeq double %a, 0.0 + %v = select i1 %t, double 0.0, double %b + ret double %v +; CHECK-LABEL: doeq +; CHECK: fcmp [[R:d[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq +} + +define double @dueq(double %a, double %b) #0 { + %t = fcmp ueq double %a, 0.0 + %v = select i1 %t, double 0.0, double %b + ret double %v +; CHECK-LABEL: dueq +; CHECK: fcmp [[R:d[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq +; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs +} + +define double @done(double %a, double %b) #0 { + %t = fcmp one double %a, 0.0 + %v = select i1 %t, double %b, double 0.0 + ret double %v +; CHECK-LABEL: done +; CHECK: fcmp [[R:d[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], mi +; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt +} + +define double @dune(double %a, double %b) #0 { + %t = fcmp une double %a, 0.0 + %v = select i1 %t, double %b, double 0.0 + ret double %v +; CHECK-LABEL: dune +; CHECK: fcmp [[R:d[0-9]+]], #0.0 +; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne +} + +attributes #0 = { nounwind "unsafe-fp-math"="true" } + -- 2.7.4