From 1d0832d9a3ee4f7e19c000bd9adc11991d9dfe23 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 21 Mar 2018 21:28:19 +0000
Subject: [PATCH] [InstCombine] move/add tests for fmul distribution; NFC

There are at least 3 problems:
1. We're distributing across large patterns, but fail to do that for the minimal patterns.
2. We're not checking uses, so we may create more instructions than we eliminate.
3. We should be able to do these transforms with less than full 'fast' fmuls.

llvm-svn: 328152
---
 llvm/test/Transforms/InstCombine/fast-math.ll |  79 -----------
 llvm/test/Transforms/InstCombine/fmul.ll      | 185 ++++++++++++++++++++++++++
 2 files changed, 185 insertions(+), 79 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 96d70db..a2ca21f 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -208,85 +208,6 @@ define float @fold16(float %x, float %y) {
   ret float %r
 }
 
-
-; =========================================================================
-;
-;   Testing-cases about fmul begin
-;
-; =========================================================================
-
-; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
-define float @fmul_distribute1(float %f1) {
-; CHECK-LABEL: @fmul_distribute1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+07
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+07
-; CHECK-NEXT:    ret float [[T3]]
-;
-  %t1 = fmul float %f1, 6.0e+3
-  %t2 = fadd float %t1, 2.0e+3
-  %t3 = fmul fast float %t2, 5.0e+3
-  ret float %t3
-}
-
-; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
-; TODO: We don't convert the fast fdiv to fmul because that would be multiplication
-; by a denormal, but we could do better when we know that denormals are not a problem.
-
-define double @fmul_distribute2(double %f1, double %f2) {
-; CHECK-LABEL: @fmul_distribute2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast double [[F1:%.*]], 0x7FE8000000000000
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], 0x69000000000000
-; CHECK-NEXT:    ret double [[T3]]
-;
-  %t1 = fdiv double %f1, 3.0e+0
-  %t2 = fadd double %t1, 5.0e+1
-  ; 0x10000000000000 = DBL_MIN
-  %t3 = fmul fast double %t2, 0x10000000000000
-  ret double %t3
-}
-
-; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
-; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
-define double @fmul_distribute3(double %f1) {
-; CHECK-LABEL: @fmul_distribute3(
-; CHECK-NEXT:    [[T1:%.*]] = fdiv double [[F1:%.*]], 3.000000e+00
-; CHECK-NEXT:    [[T2:%.*]] = fadd double [[T1]], 5.000000e-01
-; CHECK-NEXT:    [[T3:%.*]] = fmul fast double [[T2]], 0x10000000000000
-; CHECK-NEXT:    ret double [[T3]]
-;
-  %t1 = fdiv double %f1, 3.0e+0
-  %t2 = fadd double %t1, 5.0e-1
-  %t3 = fmul fast double %t2, 0x10000000000000
-  ret double %t3
-}
-
-; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
-define float @fmul_distribute4(float %f1) {
-; CHECK-LABEL: @fmul_distribute4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+07
-; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+07, [[TMP1]]
-; CHECK-NEXT:    ret float [[T3]]
-;
-  %t1 = fmul float %f1, 6.0e+3
-  %t2 = fsub float 2.0e+3, %t1
-  %t3 = fmul fast float %t2, 5.0e+3
-  ret float %t3
-}
-
-; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
-define float @fmul7(float %f1, float %f2) {
-; CHECK-LABEL: @fmul7(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
-; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[MUL]], [[F1]]
-; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL1]], [[MUL]]
-; CHECK-NEXT:    ret float [[ADD]]
-;
-  %mul = fmul float %f1, %f2
-  %mul1 = fmul fast float %mul, %f1
-  %add = fadd float %mul1, %mul
-  ret float %add
-}
-
 ; =========================================================================
 ;
 ;   Testing-cases about negation
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 10ae53e..423d5e6 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -439,3 +439,188 @@ define float @fdiv_constant_denominator_fmul_denorm_try_harder_extra_use(float %
   ret float %r
 }
 
+; FIXME: Distribute to fma form.
+; (X + C1) * C2 --> (X * C2) + C1*C2
+
+define float @fmul_fadd_distribute(float %x) {
+; CHECK-LABEL: @fmul_fadd_distribute(
+; CHECK-NEXT:    [[T2:%.*]] = fadd float [[X:%.*]], 2.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 3.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fadd float %x, 2.0
+  %t3 = fmul fast float %t2, 3.0
+  ret float %t3
+}
+
+; FIXME: Distribute to fma form.
+; (X - C1) * C2 --> (X * C2) - C1*C2
+
+define float @fmul_fsub_distribute1(float %x) {
+; CHECK-LABEL: @fmul_fsub_distribute1(
+; CHECK-NEXT:    [[T2:%.*]] = fadd float [[X:%.*]], -2.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 3.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fsub float %x, 2.0
+  %t3 = fmul fast float %t2, 3.0
+  ret float %t3
+}
+
+; FIXME: Distribute to fma form.
+; (C1 - X) * C2 --> C1*C2 - (X * C2)
+
+define float @fmul_fsub_distribute2(float %x) {
+; CHECK-LABEL: @fmul_fsub_distribute2(
+; CHECK-NEXT:    [[T2:%.*]] = fsub float 2.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 3.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fsub float 2.0, %x
+  %t3 = fmul fast float %t2, 3.0
+  ret float %t3
+}
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3)
+
+define float @fmul_fadd_fmul_distribute(float %x) {
+; CHECK-LABEL: @fmul_fadd_fmul_distribute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fadd float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+; FIXME: More instructions than we started with.
+
+define float @fmul_fadd_distribute_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fadd_distribute_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fadd float [[T1]], 2.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fadd float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
+; 0x10000000000000 = DBL_MIN
+; TODO: We don't convert the fast fdiv to fmul because that would be multiplication
+; by a denormal, but we could do better when we know that denormals are not a problem.
+
+define double @fmul_fadd_fdiv_distribute2(double %x) {
+; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fdiv double %x, 3.0
+  %t2 = fadd double %t1, 5.0
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+}
+
+; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
+; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
+
+define double @fmul_fadd_fdiv_distribute3(double %x) {
+; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fdiv double %x, 3.0
+  %t2 = fadd double %t1, 5.0
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+}
+
+; (C2 - (X*C1)) * C3 => (C2*C3) - (X * (C1*C3))
+
+define float @fmul_fsub_fmul_distribute(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+; FIXME: More instructions than we started with.
+
+define float @fmul_fsub_fmul_distribute_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fsub float 2.000000e+00, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]]
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; ((X*C1) - C2) * C3 => (X * (C1*C3)) - C2*C3
+
+define float @fmul_fsub_fmul_distribute2(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+; FIXME: More instructions than we started with.
+
+define float @fmul_fsub_fmul_distribute2_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute2_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fsub float 2.000000e+00, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]]
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+
+define float @common_factor(float %x, float %y) {
+; CHECK-LABEL: @common_factor(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[MUL]], [[X]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL1]], [[MUL]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %mul = fmul float %x, %y
+  %mul1 = fmul fast float %mul, %x
+  %add = fadd float %mul1, %mul
+  ret float %add
+}
+
-- 
2.7.4