[ARM] Add new feature to enable optimizing the VFP registers

author Evandro Menezes <e.menezes@samsung.com>

Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)

committer Evandro Menezes <e.menezes@samsung.com>

Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)
author Evandro Menezes <e.menezes@samsung.com>
Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)
committer Evandro Menezes <e.menezes@samsung.com>
Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp

index b7f7cb2..be88fe4 100644 (file)
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -660,8 +660,9 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
    const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
    // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
    // enabled when NEON is available.
-  if (!(STI.isCortexA15() && STI.hasNEON()))
+  if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
      return false;
+
    TII = STI.getInstrInfo();
    TRI = STI.getRegisterInfo();
    MRI = &Fn.getRegInfo();
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td

index 3b2136a..742b355 100644 (file)
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -195,6 +195,13 @@ def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
                                               "DontWidenVMOVS", "true",
                                               "Don't widen VMOVS to VMOVD">;
  
+// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
+// VFP register widths.
+def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon",
+                                             "SplatVFPToNeon", "true",
+                                             "Splat register from VFP to NEON",
+                                             [FeatureDontWidenVMOVS]>;
+
  // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
  def FeatureExpandMLx      : SubtargetFeature<"expand-fp-mlx",
                                               "ExpandMLx", "true",
@@ -819,6 +826,7 @@ def : ProcessorModel<"cortex-a12",  CortexA9Model,      [ARMv7a, ProcA12,
  
  def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
                                                           FeatureDontWidenVMOVS,
+                                                         FeatureSplatVFPToNeon,
                                                           FeatureHasRetAddrStack,
                                                           FeatureMuxedUnits,
                                                           FeatureTrustZone,
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h

index 93856e3..1650779 100644 (file)
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -352,9 +352,12 @@ protected:
    /// If true, the AGU and NEON/FPU units are multiplexed.
    bool HasMuxedUnits = false;
  
-  /// If true, VMOVS will never be widened to VMOVD
+  /// If true, VMOVS will never be widened to VMOVD.
    bool DontWidenVMOVS = false;
  
+  /// If true, splat a register between VFP and NEON instructions.
+  bool SplatVFPToNeon = false;
+
    /// If true, run the MLx expansion pass.
    bool ExpandMLx = false;
  
@@ -591,6 +594,7 @@ public:
    bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
    bool hasMuxedUnits() const { return HasMuxedUnits; }
    bool dontWidenVMOVS() const { return DontWidenVMOVS; }
+  bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
    bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
    bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
    bool nonpipelinedVFP() const { return NonpipelinedVFP; }
diff --git a/llvm/test/CodeGen/ARM/a15-SD-dep.ll b/llvm/test/CodeGen/ARM/a15-SD-dep.ll

index 625c40e..d0edccb 100644 (file)
--- a/llvm/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/llvm/test/CodeGen/ARM/a15-SD-dep.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=CHECK-DISABLED %s
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-ENABLED %s
+; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs -disable-a15-sd-optimization < %s | FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
+; RUN: llc -O1 -mattr=-splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs                              < %s | FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
+; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs                              < %s | FileCheck -check-prefixes=CHECK,CHECK-ENABLED  %s
  
-; CHECK-ENABLED-LABEL: t1:
-; CHECK-DISABLED-LABEL: t1:
+; CHECK-LABEL: t1:
  define <2 x float> @t1(float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
    ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -11,8 +11,7 @@ define <2 x float> @t1(float %f) {
    ret <2 x float> %i2
  }
  
-; CHECK-ENABLED-LABEL: t2:
-; CHECK-DISABLED-LABEL: t2:
+; CHECK-LABEL: t2:
  define <4 x float> @t2(float %g, float %f) {
    ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
    ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -21,8 +20,7 @@ define <4 x float> @t2(float %g, float %f) {
    ret <4 x float> %i2
  }
  
-; CHECK-ENABLED-LABEL: t3:
-; CHECK-DISABLED-LABEL: t3:
+; CHECK-LABEL: t3:
  define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
    ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -31,8 +29,7 @@ define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
    ret <2 x float> %i2
  }
  
-; CHECK-ENABLED-LABEL: t4:
-; CHECK-DISABLED-LABEL: t4:
+; CHECK-LABEL: t4:
  define <2 x float> @t4(float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
    ; CHECK-DISABLED-NOT: vdup
@@ -45,8 +42,7 @@ b:
    ret <2 x float> %i2
  }
  
-; CHECK-ENABLED-LABEL: t5:
-; CHECK-DISABLED-LABEL: t5:
+; CHECK-LABEL: t5:
  define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
    ; CHECK-ENABLED: vadd.f32
@@ -58,8 +54,7 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
  }
  
  ; Test that DPair can be successfully passed as QPR.
-; CHECK-ENABLED-LABEL: test_DPair1:
-; CHECK-DISABLED-LABEL: test_DPair1:
+; CHECK-LABEL: test_DPair1:
  define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
  entry:
    %0 = insertelement <4 x float> undef, float %x, i32 1
@@ -89,8 +84,7 @@ sw.epilog:                                        ; preds = %entry
    ret void
  }
  
-; CHECK-ENABLED-LABEL: test_DPair2:
-; CHECK-DISABLED-LABEL: test_DPair2:
+; CHECK-LABEL: test_DPair2:
  define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
  entry:
    %0 = insertelement <4 x float> undef, float %x, i32 0
author	Evandro Menezes <e.menezes@samsung.com>
	Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)
committer	Evandro Menezes <e.menezes@samsung.com>
	Fri, 20 Jul 2018 16:49:28 +0000 (16:49 +0000)
llvm/lib/Target/ARM/A15SDOptimizer.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARM.td		patch \| blob \| history
llvm/lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
llvm/test/CodeGen/ARM/a15-SD-dep.ll		patch \| blob \| history