From 9d6d4b07a21614be12edc063aeda2bd19b58f780 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Sun, 11 Oct 2020 17:33:47 +0900
Subject: [PATCH] [VE] Support fneg and frem

VE doesn't have fneg or frem instruction, so change them to expand.  Add
regression tests also.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D89205
---
 llvm/lib/Target/VE/VEISelLowering.cpp |   8 ++
 llvm/test/CodeGen/VE/fp_fneg.ll       |  67 ++++++++++
 llvm/test/CodeGen/VE/fp_frem.ll       | 171 ++++++++++++++++++++++++++
 3 files changed, 246 insertions(+)
 create mode 100644 llvm/test/CodeGen/VE/fp_fneg.ll
 create mode 100644 llvm/test/CodeGen/VE/fp_frem.ll
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index e0172a7b7a6d..314dd9ed6c5e 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -740,6 +740,14 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
   /// } Conversion
 
   /// Floating-point Ops {
+  /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
+  ///       and fcmp.
+
+  // VE doesn't have following floating point operations.
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::FNEG, VT, Expand);
+    setOperationAction(ISD::FREM, VT, Expand);
+  }
 
   // VE doesn't have fdiv of f128.
   setOperationAction(ISD::FDIV, MVT::f128, Expand);
diff --git a/llvm/test/CodeGen/VE/fp_fneg.ll b/llvm/test/CodeGen/VE/fp_fneg.ll
new file mode 100644
index 000000000000..8ebec6b9500f
--- /dev/null
+++ b/llvm/test/CodeGen/VE/fp_fneg.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test âfnegâ Instruction
+;;;
+;;; Syntax:
+;;;   <result> = fneg [fast-math flags]* <ty> <op1>   ; yields ty:result
+;;;
+;;; Overview:
+;;;    The âfnegâ instruction returns the negation of its operand.
+;;;
+;;; Arguments:
+;;;   The argument to the âfnegâ instruction must be a floating-point or
+;;;   vector of floating-point values.
+;;;
+;;; Semantics:
+;;;
+;;;   The value produced is a copy of the operand with its sign bit flipped.
+;;;   This instruction can also take any number of fast-math flags, which are
+;;;   optimization hints to enable otherwise unsafe floating-point
+;;;   optimizations.
+;;;
+;;; Example:
+;;;   <result> = fneg float %val          ; yields float:result = -%var
+;;;
+;;; Note:
+;;;   We test only float/double/fp128.
+
+; Function Attrs: norecurse nounwind readnone
+define float @fneg_float(float %0) {
+; CHECK-LABEL: fneg_float:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    sra.l %s0, %s0, 32
+; CHECK-NEXT:    lea %s1, -2147483648
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    xor %s0, %s0, %s1
+; CHECK-NEXT:    sll %s0, %s0, 32
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = fneg float %0
+  ret float %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @fneg_double(double %0) {
+; CHECK-LABEL: fneg_double:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, %s0, (1)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = fneg double %0
+  ret double %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define fp128 @fneg_quad(fp128 %0) {
+; CHECK-LABEL: fneg_quad:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 176(, %s11)
+; CHECK-NEXT:    st %s0, 184(, %s11)
+; CHECK-NEXT:    ld1b.zx %s0, 191(, %s11)
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    xor %s0, %s0, %s1
+; CHECK-NEXT:    st1b %s0, 191(, %s11)
+; CHECK-NEXT:    ld %s1, 176(, %s11)
+; CHECK-NEXT:    ld %s0, 184(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = fneg fp128 %0
+  ret fp128 %2
+}
diff --git a/llvm/test/CodeGen/VE/fp_frem.ll b/llvm/test/CodeGen/VE/fp_frem.ll
new file mode 100644
index 000000000000..3906c67dc333
--- /dev/null
+++ b/llvm/test/CodeGen/VE/fp_frem.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test âfremâ Instruction
+;;;
+;;; Syntax:
+;;;   <result> = frem [fast-math flags]* <ty> <op1>, <op2> ; yields ty:result
+;;;
+;;; Overview:
+;;;   The âfremâ instruction returns the remainder from the division of its two
+;;;   operands.
+;;;
+;;; Arguments:
+;;;   The two arguments to the âfremâ instruction must be floating-point or
+;;;   vector of floating-point values. Both arguments must have identical types.
+;;;
+;;; Semantics:
+;;;   The value produced is the floating-point remainder of the two operands.
+;;;   This is the same output as a libm âfmodâ function, but without any
+;;;   possibility of setting errno. The remainder has the same sign as the
+;;;   dividend. This instruction is assumed to execute in the default
+;;;   floating-point environment. This instruction can also take any number
+;;;   of fast-math flags, which are optimization hints to enable otherwise
+;;;   unsafe floating-point optimizations:
+;;;
+;;; Example:
+;;;
+;;;   <result> = frem float 4.0, %var ; yields float:result = 4.0 % %var
+;;;
+;;; Note:
+;;;   We test only float/double/fp128.
+;;;   We have no way to generated frem from C source code, so convert fdiv
+;;;   to frem by using sed program.
+
+; Function Attrs: norecurse nounwind readnone
+define float @frem_float_var(float %0, float %1) {
+; CHECK-LABEL: frem_float_var:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s2, fmodf@lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = frem float %0, %1
+  ret float %3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @frem_double_var(double %0, double %1) {
+; CHECK-LABEL: frem_double_var:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s2, fmod@lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmod@hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = frem double %0, %1
+  ret double %3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define fp128 @frem_quad_var(fp128 %0, fp128 %1) {
+; CHECK-LABEL: frem_quad_var:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s4, fmodl@lo
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodl@hi(, %s4)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = frem fp128 %0, %1
+  ret fp128 %3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define float @frem_float_zero(float %0) {
+; CHECK-LABEL: frem_float_zero:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea.sl %s0, 0
+; CHECK-NEXT:    lea %s2, fmodf@lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem float 0.000000e+00, %0
+  ret float %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @frem_double_zero(double %0) {
+; CHECK-LABEL: frem_double_zero:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea %s0, fmod@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmod@hi(, %s0)
+; CHECK-NEXT:    lea.sl %s0, 0
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem double 0.000000e+00, %0
+  ret double %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define fp128 @frem_quad_zero(fp128 %0) {
+; CHECK-LABEL: frem_quad_zero:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    or %s3, 0, %s1
+; CHECK-NEXT:    lea %s0, .LCPI{{[0-9]+}}_0@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s4, .LCPI{{[0-9]+}}_0@hi(, %s0)
+; CHECK-NEXT:    ld %s0, 8(, %s4)
+; CHECK-NEXT:    ld %s1, (, %s4)
+; CHECK-NEXT:    lea %s4, fmodl@lo
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodl@hi(, %s4)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem fp128 0xL00000000000000000000000000000000, %0
+  ret fp128 %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define float @frem_float_cont(float %0) {
+; CHECK-LABEL: frem_float_cont:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea.sl %s0, -1073741824
+; CHECK-NEXT:    lea %s2, fmodf@lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem float -2.000000e+00, %0
+  ret float %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @frem_double_cont(double %0) {
+; CHECK-LABEL: frem_double_cont:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea %s0, fmod@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmod@hi(, %s0)
+; CHECK-NEXT:    lea.sl %s0, -1073741824
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem double -2.000000e+00, %0
+  ret double %2
+}
+
+; Function Attrs: norecurse nounwind readnone
+define fp128 @frem_quad_cont(fp128 %0) {
+; CHECK-LABEL: frem_quad_cont:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    or %s3, 0, %s1
+; CHECK-NEXT:    lea %s0, .LCPI{{[0-9]+}}_0@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s4, .LCPI{{[0-9]+}}_0@hi(, %s0)
+; CHECK-NEXT:    ld %s0, 8(, %s4)
+; CHECK-NEXT:    ld %s1, (, %s4)
+; CHECK-NEXT:    lea %s4, fmodl@lo
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodl@hi(, %s4)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = frem fp128 0xL0000000000000000C000000000000000, %0
+  ret fp128 %2
+}
-- 
2.34.1