Enabling the generation of dependency breakers for partial updates on Cortex-A15...

author Silviu Baranga <silviu.baranga@arm.com>

Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)

committer Silviu Baranga <silviu.baranga@arm.com>

Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)
author Silviu Baranga <silviu.baranga@arm.com>
Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)
committer Silviu Baranga <silviu.baranga@arm.com>
Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

index ed8b9cd..126f160 100644 (file)
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3734,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
    if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
      return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
  
-  // A9-like cores are particularly picky about mixing the two and want these
+  // CortexA9 is particularly picky about mixing the two and wants these
    // converted.
-  if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
        (MI->getOpcode() == ARM::VMOVRS ||
         MI->getOpcode() == ARM::VMOVSR ||
         MI->getOpcode() == ARM::VMOVS))
@@ -4023,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
  // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
  //
  // FCONSTD can be used as a dependency-breaking instruction.
-
-
  unsigned ARMBaseInstrInfo::
  getPartialRegUpdateClearance(const MachineInstr *MI,
                               unsigned OpNum,
                               const TargetRegisterInfo *TRI) const {
-  // Only Swift has partial register update problems.
-  if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+  if (!SwiftPartialUpdateClearance ||
+      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
      return 0;
  
    assert(TRI && "Need TRI instance");
@@ -4056,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
  
      // Explicitly reads the dependency.
    case ARM::VLD1LNd32:
-    UseOp = 1;
+    UseOp = 3;
      break;
    default:
      return 0;
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp

index b0f9e56..42c7d2c 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -185,8 +185,7 @@ bool ARMPassConfig::addPreSched2() {
        addPass(createARMLoadStoreOptimizationPass());
        printAndVerify("After ARM load / store optimizer");
      }
-    if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
-      getARMSubtarget().hasNEON())
+    if (getARMSubtarget().hasNEON())
        addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
    }
  
diff --git a/llvm/test/CodeGen/ARM/a15-SD-dep.ll b/llvm/test/CodeGen/ARM/a15-SD-dep.ll

index 17e3eba..a52468e 100644 (file)
--- a/llvm/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/llvm/test/CodeGen/ARM/a15-SD-dep.ll
@@ -5,7 +5,7 @@
  ; CHECK-DISABLED: t1:
  define <2 x float> @t1(float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
-  ; CHECK-DISABLED: vmov.32 d0[1], r{{.}}
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
    %i1 = insertelement <2 x float> undef, float %f, i32 1
    %i2 = fadd <2 x float> %i1, %i1
    ret <2 x float> %i2
@@ -15,7 +15,7 @@ define <2 x float> @t1(float %f) {
  ; CHECK-DISABLED: t2:
  define <4 x float> @t2(float %g, float %f) {
    ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
-  ; CHECK-DISABLED: vmov.32 d0[1], r{{.}}
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
    %i1 = insertelement <4 x float> undef, float %f, i32 1
    %i2 = fadd <4 x float> %i1, %i1
    ret <4 x float> %i2
@@ -25,6 +25,7 @@ define <4 x float> @t2(float %g, float %f) {
  ; CHECK-DISABLED: t3:
  define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
    ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
    %i1 = insertelement <2 x float> undef, float %f, i32 1
    %i2 = fadd <2 x float> %i1, %i1
    ret <2 x float> %i2
diff --git a/llvm/test/CodeGen/ARM/a15-partial-update.ll b/llvm/test/CodeGen/ARM/a15-partial-update.ll

new file mode 100644 (file)

index 0000000..6306790
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/a15-partial-update.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s  | FileCheck %s
+
+; CHECK: t1:
+define <2 x float> @t1(float* %A, <2 x float> %B) {
+; The generated code for this test uses a vld1.32 instruction
+; to write the lane 1 of a D register containing the value of
+; <2 x float> %B. Since the D register is defined, it would
+; be incorrect to fully write it (with a vmov.f64) before the
+; vld1.32 instruction. The test checks that a vmov.f64 was not
+; generated.
+
+; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
+  %tmp2 = load float* %A, align 4
+  %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
+  ret <2 x float> %tmp3
+}
+
+; CHECK: t2:
+define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) {
+entry:
+  br label %loop
+loop:
+; The code generated by this test uses a vld1.32 instruction.
+; We check that a dependency breaking vmov* instruction was
+; generated.
+
+; CHECK: vmov.{{.*}} d{{[0-9]+}},
+  %oldcount = phi i32 [0, %entry], [%newcount, %loop]
+  %newcount = add i32 %oldcount, 1
+  %p1 = getelementptr <4 x i8> *%in, i32 %newcount
+  %p2 = getelementptr <4 x i8> *%out, i32 %newcount
+  %tmp1 = load <4 x i8> *%p1, align 4
+  store <4 x i8> %tmp1, <4 x i8> *%p2
+  %cmp = icmp eq i32 %newcount, %n
+  br i1 %cmp, label %loop, label %ret
+ret:
+  ret void
+}
author	Silviu Baranga <silviu.baranga@arm.com>
	Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)
committer	Silviu Baranga <silviu.baranga@arm.com>
	Wed, 27 Mar 2013 12:38:44 +0000 (12:38 +0000)
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetMachine.cpp		patch \| blob \| history
llvm/test/CodeGen/ARM/a15-SD-dep.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/a15-partial-update.ll	[new file with mode: 0644]	patch \| blob