[PowerPC] add tests for FMF propagation; NFC

author Sanjay Patel <spatel@rotateright.com>

Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)
author Sanjay Patel <spatel@rotateright.com>
Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll

new file mode 100644 (file)

index 0000000..21d367a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -0,0 +1,355 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
+; RUN: llc < %s -mtriple=powerpc64le                                    | FileCheck %s --check-prefix=FMF
+; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math             | FileCheck %s --check-prefix=GLOBAL
+
+; Test FP transforms using instruction/node-level fast-math-flags.
+; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
+; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
+
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.sqrt.f32(float)
+
+; X * Y + Z --> fma(X, Y, Z)
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
+; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd contract {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
+
+define float @fmul_fadd_contract1(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_contract1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmulsp 0, 1, 2
+; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_contract1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, %y
+  %add = fadd contract float %mul, %z
+  ret float %add
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
+; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
+
+define float @fmul_fadd_contract2(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_contract2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_contract2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul contract float %x, %y
+  %add = fadd contract float %mul, %z
+  ret float %add
+}
+
+; Reassociation implies that FMA contraction is allowed.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
+; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
+
+define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_reassoc1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmulsp 0, 1, 2
+; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_reassoc1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, %y
+  %add = fadd reassoc float %mul, %z
+  ret float %add
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
+; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
+
+define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_reassoc2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmulsp 0, 1, 2
+; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_reassoc2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul reassoc float %x, %y
+  %add = fadd reassoc float %mul, %z
+  ret float %add
+}
+
+; The fadd is now fully 'fast'. This implies that contraction is allowed.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
+; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
+
+define float @fmul_fadd_fast1(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_fast1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_fast1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul fast float %x, %y
+  %add = fadd fast float %mul, %z
+  ret float %add
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
+; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
+
+define float @fmul_fadd_fast2(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_fast2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_fast2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul fast float %x, %y
+  %add = fadd fast float %mul, %z
+  ret float %add
+}
+
+; fma(X, 7.0, X * 42.0) --> X * 49.0
+; This is the minimum FMF needed for this transform - the FMA allows reassociation.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
+; FMFDEBUG:         fma {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
+
+define float @fmul_fma_reassoc1(float %x) {
+; FMF-LABEL: fmul_fma_reassoc1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI6_0@toc@l
+; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI6_1@toc@l
+; FMF-NEXT:    lfsx 2, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_reassoc1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; GLOBAL-NEXT:    addi 3, 3, .LCPI6_0@toc@l
+; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, 42.0
+  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
+; FMFDEBUG:         fma {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
+
+define float @fmul_fma_reassoc2(float %x) {
+; FMF-LABEL: fmul_fma_reassoc2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI7_0@toc@l
+; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI7_1@toc@l
+; FMF-NEXT:    lfsx 2, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_reassoc2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; GLOBAL-NEXT:    addi 3, 3, .LCPI7_0@toc@l
+; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul reassoc float %x, 42.0
+  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; The FMA is now fully 'fast'. This implies that reassociation is allowed.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
+; FMFDEBUG:         fma {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
+
+define float @fmul_fma_fast1(float %x) {
+; FMF-LABEL: fmul_fma_fast1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI8_0@toc@l
+; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI8_1@toc@l
+; FMF-NEXT:    lfsx 2, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_fast1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; GLOBAL-NEXT:    addi 3, 3, .LCPI8_0@toc@l
+; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, 42.0
+  %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
+; FMFDEBUG:         fma {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
+
+define float @fmul_fma_fast2(float %x) {
+; FMF-LABEL: fmul_fma_fast2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI9_0@toc@l
+; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
+; FMF-NEXT:    addi 3, 3, .LCPI9_1@toc@l
+; FMF-NEXT:    lfsx 2, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_fast2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; GLOBAL-NEXT:    addi 3, 3, .LCPI9_0@toc@l
+; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul fast float %x, 42.0
+  %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
+; FMFDEBUG:         fsqrt {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn:'
+
+define float @sqrt_afn(float %x) {
+; FMF-LABEL: sqrt_afn:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xssqrtsp 1, 1
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: sqrt_afn:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xxlxor 0, 0, 0
+; GLOBAL-NEXT:    fcmpu 0, 1, 0
+; GLOBAL-NEXT:    beq 0, .LBB10_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    xsrsqrtesp 2, 1
+; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    fmr 4, 1
+; GLOBAL-NEXT:    addi 3, 3, .LCPI10_0@toc@l
+; GLOBAL-NEXT:    lfsx 3, 0, 3
+; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
+; GLOBAL-NEXT:    xsmulsp 0, 2, 2
+; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
+; GLOBAL-NEXT:    xsmulsp 0, 2, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:  .LBB10_2:
+; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    blr
+  %rt = call afn float @llvm.sqrt.f32(float %x)
+  ret float %rt
+}
+
+; The call is now fully 'fast'. This implies that approximation is allowed.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
+; FMFDEBUG:         fsqrt {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast:'
+
+define float @sqrt_fast(float %x) {
+; FMF-LABEL: sqrt_fast:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xssqrtsp 1, 1
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: sqrt_fast:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xxlxor 0, 0, 0
+; GLOBAL-NEXT:    fcmpu 0, 1, 0
+; GLOBAL-NEXT:    beq 0, .LBB11_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    xsrsqrtesp 2, 1
+; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    fmr 4, 1
+; GLOBAL-NEXT:    addi 3, 3, .LCPI11_0@toc@l
+; GLOBAL-NEXT:    lfsx 3, 0, 3
+; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
+; GLOBAL-NEXT:    xsmulsp 0, 2, 2
+; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
+; GLOBAL-NEXT:    xsmulsp 0, 2, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:  .LBB11_2:
+; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    blr
+  %rt = call fast float @llvm.sqrt.f32(float %x)
+  ret float %rt
+}
+
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 3 May 2018 17:41:37 +0000 (17:41 +0000)