From 7328ff75bab0d559645927ddaeab39bf0f4b8d03 Mon Sep 17 00:00:00 2001 From: gonglingqin Date: Tue, 20 Sep 2022 19:05:08 +0800 Subject: [PATCH] [LoongArch] Add codegen support for fmaxnum_ieee and fminnum_ieee Thanks for @xry111's previous bug fixes. See https://github.com/loongson/llvm-project/pull/1 for more details. Differential Revision: https://reviews.llvm.org/D133478 --- .../Target/LoongArch/LoongArchFloat32InstrInfo.td | 4 + .../Target/LoongArch/LoongArchFloat64InstrInfo.td | 4 + .../lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 + llvm/test/CodeGen/LoongArch/fp-max-min.ll | 154 +++++++++++++++++++++ 4 files changed, 166 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/fp-max-min.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 1ef56d3..d9993e0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -143,12 +143,16 @@ def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; def : PatFpr; def : PatFpr; def : PatFpr; def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>; +def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>; + /// Setcc // Match non-signaling comparison diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 145da14..42a0251 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -149,6 +149,8 @@ def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; def : PatFpr; def : PatFpr; def : PatFpr; @@ -160,6 +162,8 @@ def : Pat<(fcopysign FPR64:$fj, FPR32:$fk), def : Pat<(fcopysign FPR32:$fj, FPR64:$fk), (FCOPYSIGN_S FPR32:$fj, (FCVT_S_D FPR64:$fk))>; +def : Pat<(fcanonicalize FPR64:$fj), (FMAX_D $fj, $fj)>; + /// Setcc // Match non-signaling comparison diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index df55dfd..e229951 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -112,6 +112,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); } if (Subtarget.hasBasicD()) { setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -120,6 +122,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); } // Effectively disable jump table generation. diff --git a/llvm/test/CodeGen/LoongArch/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/fp-max-min.ll new file mode 100644 index 0000000..b2ca475 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-max-min.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.maxnum.f64(double, double) +declare float @llvm.minnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) + +define float @maxnum_float(float %x, float %y) { +; LA32F-LABEL: maxnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maxnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maxnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maxnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.maxnum.f32(float %x, float %y) + ret float %z +} + +define double @maxnum_double(double %x, double %y) { +; LA32F-LABEL: maxnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmax) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maxnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maxnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maxnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.maxnum.f64(double %x, double %y) + ret double %z +} + +define float @minnum_float(float %x, float %y) { +; LA32F-LABEL: minnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.minnum.f32(float %x, float %y) + ret float %z +} + +define double @minnum_double(double %x, double %y) { +; LA32F-LABEL: minnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmin) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.minnum.f64(double %x, double %y) + ret double %z +} -- 2.7.4