[ARM] Do not fuse VADD and VMUL, continued (2/2)

author Sjoerd Meijer <sjoerd.meijer@arm.com>

Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)

committer Sjoerd Meijer <sjoerd.meijer@arm.com>

Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)
author Sjoerd Meijer <sjoerd.meijer@arm.com>
Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)
committer Sjoerd Meijer <sjoerd.meijer@arm.com>
Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td

index 529446c..fc8ed95 100644 (file)
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -365,12 +365,14 @@ let RecomputePerFunction = 1 in {
  def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
  
  // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
  // Do not use them for Darwin platforms.
  def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
                                   " FPOpFusion::Fast && "
                                   " Subtarget->hasVFP4()) && "
-                                 "!Subtarget->isTargetDarwin()">;
+                                 "!Subtarget->isTargetDarwin() &&"
+                                 "Subtarget->useFPVMLx()">;
  
  def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
  def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
diff --git a/llvm/test/CodeGen/ARM/fusedMAC.ll b/llvm/test/CodeGen/ARM/fusedMAC.ll

index 6f6cdc1..6b92289 100644 (file)
--- a/llvm/test/CodeGen/ARM/fusedMAC.ll
+++ b/llvm/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,8 @@
  ; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m7  -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m4  -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m33 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
+
  ; Check generated fused MAC and MLS.
  
  define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -12,6 +16,11 @@ define double @fusedMACTest1(double %d1, double %d2, double %d3) {
  define float @fusedMACTest2(float %f1, float %f2, float %f3) {
  ;CHECK-LABEL: fusedMACTest2:
  ;CHECK: vfma.f32
+
+;DONT-FUSE-LABEL: fusedMACTest2:
+;DONT-FUSE:       vmul.f32
+;DONT-FUSE-NEXT:  vadd.f32
+
    %1 = fmul float %f1, %f2
    %2 = fadd float %1, %f3
    ret float %2
author	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Wed, 17 Oct 2018 10:05:44 +0000 (10:05 +0000)
llvm/lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/ARM/fusedMAC.ll		patch \| blob \| history