From d7be3eab5c0e1598e919973ed68a200997a4734a Mon Sep 17 00:00:00 2001
From: =?utf8?q?Lu=C3=ADs=20Marques?= <luismarques@lowrisc.org>
Date: Tue, 26 Nov 2019 14:24:59 +0000
Subject: [PATCH] [RISCV] Handle fcopysign(f32, f64) and fcopysign(f64, f32)

Summary: Adds tablegen patterns to explicitly handle fcopysign where the
magnitude and sign arguments have different types, due to the sign value casts
being removed the by DAGCombiner. Support for RV32IF follows in a separate
commit. Adds tests for all relevant scenarios except RV32IF.

Reviewers: lenary
Reviewed By: lenary
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70678
---
 llvm/lib/Target/RISCV/RISCVInstrInfoD.td  |  3 +
 llvm/test/CodeGen/RISCV/copysign-casts.ll | 92 +++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/copysign-casts.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index fe38c4f..b5343e8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -231,6 +231,9 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
 
 def : PatFpr64Fpr64<fcopysign, FSGNJ_D>;
 def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
+def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>;
+def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
+                                                              0b111))>;
 
 // fmadd: rs1 * rs2 + rs3
 def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
new file mode 100644
index 0000000..45faa23
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \
+; RUN:   -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \
+; RUN:   -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64IFD
+
+; Test fcopysign scenarios where the sign argument is casted to the type of the
+; magnitude argument. Those casts can be folded away by the DAGCombiner.
+
+declare double @llvm.copysign.f64(double, double)
+declare float @llvm.copysign.f32(float, float)
+
+define double @fold_promote(double %a, float %b) nounwind {
+; RV32I-LABEL: fold_promote:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a3, 524288
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    addi a3, a3, -1
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fold_promote:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, -1
+; RV64I-NEXT:    slli a2, a2, 63
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    slli a2, a2, 31
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32IFD-LABEL: fold_promote:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    fcvt.d.s ft0, fa1
+; RV32IFD-NEXT:    fsgnj.d fa0, fa0, ft0
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fold_promote:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fcvt.d.s ft0, fa1
+; RV64IFD-NEXT:    fsgnj.d fa0, fa0, ft0
+; RV64IFD-NEXT:    ret
+  %c = fpext float %b to double
+  %t = call double @llvm.copysign.f64(double %a, double %c)
+  ret double %t
+}
+
+define float @fold_demote(float %a, double %b) nounwind {
+; RV32I-LABEL: fold_demote:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    and a2, a2, a1
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fold_demote:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    addi a2, zero, -1
+; RV64I-NEXT:    slli a2, a2, 63
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32IFD-LABEL: fold_demote:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    fcvt.s.d ft0, fa1
+; RV32IFD-NEXT:    fsgnj.s fa0, fa0, ft0
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fold_demote:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fcvt.s.d ft0, fa1
+; RV64IFD-NEXT:    fsgnj.s fa0, fa0, ft0
+; RV64IFD-NEXT:    ret
+  %c = fptrunc double %b to float
+  %t = call float @llvm.copysign.f32(float %a, float %c)
+  ret float %t
+}
-- 
2.7.4