[ARM] Do not fuse VADD and VMUL on the Cortex-M4 and Cortex-M33

author Sjoerd Meijer <sjoerd.meijer@arm.com>

Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)

committer Sjoerd Meijer <sjoerd.meijer@arm.com>

Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)
author Sjoerd Meijer <sjoerd.meijer@arm.com>
Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)
committer Sjoerd Meijer <sjoerd.meijer@arm.com>
Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td

index c42a4eb..62a32ac 100644 (file)
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -966,6 +966,7 @@ def : ProcessorModel<"cortex-m4", CortexM3Model,        [ARMv7em,
                                                           FeatureVFPOnlySP,
                                                           FeatureD16,
                                                           FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                           FeatureHasNoBranchPredictor]>;
  
  def : ProcNoItin<"cortex-m7",                           [ARMv7em,
@@ -981,6 +982,7 @@ def : ProcessorModel<"cortex-m33", CortexM3Model,       [ARMv8mMainline,
                                                           FeatureD16,
                                                           FeatureVFPOnlySP,
                                                           FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                           FeatureHasNoBranchPredictor]>;
  
  def : ProcNoItin<"cortex-a32",                           [ARMv8a,
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td

index 57e515c..5342b99 100644 (file)
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -353,10 +353,10 @@ def UseNegativeImmediates :
  let RecomputePerFunction = 1 in {
    def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
    def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
-  def UseMovtInPic          : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
-  def DontUseMovtInPic      : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+  def UseMovtInPic     : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
+  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+  def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx() || MF->getFunction().optForMinSize()">;
  }
-def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
  def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
  
  // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
diff --git a/llvm/test/CodeGen/ARM/fmacs.ll b/llvm/test/CodeGen/ARM/fmacs.ll

index aa49270..027991e 100644 (file)
--- a/llvm/test/CodeGen/ARM/fmacs.ll
+++ b/llvm/test/CodeGen/ARM/fmacs.ll
@@ -3,6 +3,8 @@
  ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
  ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
  ; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard %s -o - | FileCheck %s -check-prefix=HARD
+; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard %s -o - | FileCheck %s -check-prefix=VMLA
+; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-m33 -float-abi=hard %s -o - | FileCheck %s -check-prefix=VMLA
  
  define float @t1(float %acc, float %a, float %b) {
  entry:
@@ -15,6 +17,21 @@ entry:
  ; A8-LABEL: t1:
  ; A8: vmul.f32
  ; A8: vadd.f32
+
+; VMLA-LABEL: t1:
+; VMLA:       vmul.f32
+; VMLA-NEXT:  vadd.f32
+
+  %0 = fmul float %a, %b
+  %1 = fadd float %acc, %0
+       ret float %1
+}
+
+define float @vlma_minsize(float %acc, float %a, float %b) #0 {
+entry:
+; VMLA-LABEL: vlma_minsize:
+; VLMA:       vmla.f32  s0, s1, s2
+
    %0 = fmul float %a, %b
    %1 = fadd float %acc, %0
         ret float %1
@@ -102,3 +119,5 @@ entry:
    %3 = fadd float %1, %2
    ret float %3
  }
+
+attributes #0 = { minsize nounwind optsize }
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll

index 847aeac..8ee2af0 100644 (file)
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -1,5 +1,6 @@
  ; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m33                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8  -check-prefix=VMLA
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA
@@ -188,8 +189,6 @@ define float @round_f(float %a) {
    ret float %1
  }
  
-; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd?
-; (these should be equivalent, even the rounding is the same)
  declare float     @llvm.fmuladd.f32(float %a, float %b, float %c)
  define float @fmuladd_f(float %a, float %b, float %c) {
  ; CHECK-LABEL: fmuladd_f:
author	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Mon, 24 Sep 2018 12:02:50 +0000 (12:02 +0000)
llvm/lib/Target/ARM/ARM.td		patch \| blob \| history
llvm/lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/ARM/fmacs.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll		patch \| blob \| history