From 8ffe8891cd57694dbed148cfcae8aaede70a0f30 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Tue, 12 May 2020 14:29:40 +0800
Subject: [PATCH] [PowerPC] Exploit VSX neg, abs and nabs for f32

xsnegdp, xsabsdp and xsnabsdp can be used to operate on f32 operand.

This patch adds the missing patterns since we prefer VSX instructions
when available.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D75344
---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td       | 18 ++++++++++++++++++
 llvm/test/CodeGen/PowerPC/float-logic-ops.ll |  6 +++---
 llvm/test/CodeGen/PowerPC/fma.ll             |  6 ++++++
 llvm/test/CodeGen/PowerPC/fmf-propagation.ll |  8 ++++----
 llvm/test/CodeGen/PowerPC/fsub-fneg.ll       |  6 +++---
 5 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 419f3fc..93fbdd6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2602,6 +2602,16 @@ def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
           (v2f64 (XVMINDP $src1, $src2))>;
 
+// f32 abs
+def : Pat<(f32 (fabs f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSABSDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 nabs
+def : Pat<(f32 (fneg (fabs f32:$S))),
+          (f32 (COPY_TO_REGCLASS (XSNABSDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
 // f32 Min.
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
@@ -2999,6 +3009,14 @@ def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
 def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
           (XSNMSUBASP $C, $A, $B)>;
 
+// f32 neg
+// Although XSNEGDP is available in P7, we want to select it starting from P8,
+// so that FNMSUBS can be selected for fneg-fmsub pattern on P7. (VSX version,
+// XSNMSUBASP, is available since P8)
+def : Pat<(f32 (fneg f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSNEGDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
 // Instructions for converting float to i32 feeding a store.
 def : Pat<(PPCstore_scal_int_from_vsr
             (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
diff --git a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
index 938ee5d..3072263 100644
--- a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
@@ -5,7 +5,7 @@
 define float @absf(float %a) {
 ; CHECK-LABEL: absf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fabs f1, f1
+; CHECK-NEXT:    xsabsdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
   %conv = bitcast float %a to i32
@@ -80,7 +80,7 @@ entry:
 define float @negf(float %a) {
 ; CHECK-LABEL: negf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fneg f1, f1
+; CHECK-NEXT:    xsnegdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
   %conv = bitcast float %a to i32
@@ -127,7 +127,7 @@ entry:
 define float @nabsf(float %a) {
 ; CHECK-LABEL: nabsf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fnabs f1, f1
+; CHECK-NEXT:    xsnabsdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
   %conv = bitcast float %a to i32
diff --git a/llvm/test/CodeGen/PowerPC/fma.ll b/llvm/test/CodeGen/PowerPC/fma.ll
index f6c5774..3f1b210 100644
--- a/llvm/test/CodeGen/PowerPC/fma.ll
+++ b/llvm/test/CodeGen/PowerPC/fma.ll
@@ -198,6 +198,9 @@ define float @test_XSNMADDASP(float %A, float %B, float %C) {
 	ret float %F
 ; CHECK-P8-LABEL: test_XSNMADDASP:
 ; CHECK-P8: xsnmaddasp
+
+; CHECK-VSX-LABEL: test_XSNMADDASP:
+; CHECK-VSX: fnmadds
 }
 
 define float @test_XSNMSUBASP(float %A, float %B, float %C) {
@@ -208,4 +211,7 @@ define float @test_XSNMSUBASP(float %A, float %B, float %C) {
 	ret float %F
 ; CHECK-P8-LABEL: test_XSNMSUBASP:
 ; CHECK-P8: xsnmsubasp
+
+; CHECK-VSX-LABEL: test_XSNMSUBASP:
+; CHECK-VSX: fnmsubs
 }
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 59a7d23..351a98c 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -280,8 +280,8 @@ define float @fmul_fma_fast2(float %x) {
 define float @sqrt_afn_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_afn_ieee:
 ; FMF:       # %bb.0:
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
-; FMF-NEXT:    fabs 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
@@ -303,8 +303,8 @@ define float @sqrt_afn_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_afn_ieee:
 ; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
-; GLOBAL-NEXT:    fabs 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
@@ -418,8 +418,8 @@ define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
 define float @sqrt_fast_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_fast_ieee:
 ; FMF:       # %bb.0:
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; FMF-NEXT:    fabs 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
@@ -440,8 +440,8 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_fast_ieee:
 ; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; GLOBAL-NEXT:    fabs 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
diff --git a/llvm/test/CodeGen/PowerPC/fsub-fneg.ll b/llvm/test/CodeGen/PowerPC/fsub-fneg.ll
index 57b82a3..9658c96 100644
--- a/llvm/test/CodeGen/PowerPC/fsub-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/fsub-fneg.ll
@@ -8,9 +8,9 @@
 define double @neg_ext_op1_extra_use(float %x, double %y) nounwind {
 ; CHECK-LABEL: neg_ext_op1_extra_use:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xsadddp 0, 2, 1
-; CHECK-NEXT:    fneg 1, 1
-; CHECK-NEXT:    xsdivdp 1, 1, 0
+; CHECK-NEXT:    xsnegdp 0, 1
+; CHECK-NEXT:    xsadddp 1, 2, 1
+; CHECK-NEXT:    xsdivdp 1, 0, 1
 ; CHECK-NEXT:    blr
   %t1 = fsub float -0.0, %x
   %t2 = fpext float %t1 to double
-- 
2.7.4