From 1fbe9bcab479522e7704716f0f57c7c09d27900a Mon Sep 17 00:00:00 2001
From: Chad Rosier <mcrosier@codeaurora.org>
Date: Fri, 15 Apr 2016 18:09:10 +0000
Subject: [PATCH] [AArch64] Add load/store pair instructions to
 getMemOpBaseRegImmOfsWidth().

This improves AA in the MI schduler when reason about paired instructions.

Phabricator Revision: http://reviews.llvm.org/D17098
PR26358

llvm-svn: 266462
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp       | 51 +++++++++++++++++++---
 .../AArch64/arm64-alloc-no-stack-realign.ll        |  2 +-
 llvm/test/CodeGen/AArch64/arm64-stp-aa.ll          | 34 +++++++++++++++
 3 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 91b2e14..0ad1ddf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1466,9 +1466,15 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
     const TargetRegisterInfo *TRI) const {
   assert(LdSt->mayLoadOrStore() && "Expected a memory operation.");
   // Handle only loads/stores with base register followed by immediate offset.
-  if (LdSt->getNumExplicitOperands() != 3)
-    return false;
-  if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+  if (LdSt->getNumExplicitOperands() == 3) {
+    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+    if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+      return false;
+  } else if (LdSt->getNumExplicitOperands() == 4) {
+    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+    if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isReg() || !LdSt->getOperand(3).isImm())
+      return false;
+  } else
     return false;
 
   // Offset is calculated as the immediate operand multiplied by the scaling factor.
@@ -1515,16 +1521,45 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
     Width = 1;
     Scale = 1;
     break;
+  case AArch64::LDPQi:
+  case AArch64::LDNPQi:
+  case AArch64::STPQi:
+  case AArch64::STNPQi:
+    Scale = 16;
+    Width = 32;
+    break;
   case AArch64::LDRQui:
   case AArch64::STRQui:
     Scale = Width = 16;
     break;
+  case AArch64::LDPXi:
+  case AArch64::LDPDi:
+  case AArch64::LDNPXi:
+  case AArch64::LDNPDi:
+  case AArch64::STPXi:
+  case AArch64::STPDi:
+  case AArch64::STNPXi:
+  case AArch64::STNPDi:
+    Scale = 8;
+    Width = 16;
+    break;
   case AArch64::LDRXui:
   case AArch64::LDRDui:
   case AArch64::STRXui:
   case AArch64::STRDui:
     Scale = Width = 8;
     break;
+  case AArch64::LDPWi:
+  case AArch64::LDPSi:
+  case AArch64::LDNPWi:
+  case AArch64::LDNPSi:
+  case AArch64::STPWi:
+  case AArch64::STPSi:
+  case AArch64::STNPWi:
+  case AArch64::STNPSi:
+    Scale = 4;
+    Width = 8;
+    break;
   case AArch64::LDRWui:
   case AArch64::LDRSui:
   case AArch64::LDRSWui:
@@ -1546,8 +1581,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
     break;
   }
 
-  BaseReg = LdSt->getOperand(1).getReg();
-  Offset = LdSt->getOperand(2).getImm() * Scale;
+  if (LdSt->getNumExplicitOperands() == 3) {
+    BaseReg = LdSt->getOperand(1).getReg();
+    Offset = LdSt->getOperand(2).getImm() * Scale;
+  } else {
+    assert(LdSt->getNumExplicitOperands() == 4 && "invalid number of operands");
+    BaseReg = LdSt->getOperand(2).getReg();
+    Offset = LdSt->getOperand(3).getImm() * Scale;
+  }
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
index bf2d2cf..71bf203 100644
--- a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false -enable-post-misched=false | FileCheck %s
 
 ; rdar://12713765
 ; Make sure we are not creating stack objects that are assumed to be 64-byte
diff --git a/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll b/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll
index 82d343d..2a45745 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll
@@ -109,3 +109,37 @@ define double @stp_double_aa_after(double %d0, double %a, double %b, double* noc
   store double %b, double* %add.ptr, align 8
   ret double %tmp
 }
+
+; Check that the stores %c and %d are paired after the fadd instruction,
+; and then the stores %a and %d are paired after proving that they do not
+; depend on the the (%c, %d) pair.
+;
+; CHECK-LABEL: st1:
+; CHECK: stp q0, q1, [x{{[0-9]+}}]
+; CHECK: fadd
+; CHECK: stp q2, q0, [x{{[0-9]+}}, #32]
+define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) {
+entry:
+  %a0 = getelementptr inbounds float, float* %base, i64 %index
+  %b0 = getelementptr float, float* %a0, i64 4
+  %c0 = getelementptr float, float* %a0, i64 8
+  %d0 = getelementptr float, float* %a0, i64 12
+
+  %a1 = bitcast float* %a0 to <4 x float>*
+  %b1 = bitcast float* %b0 to <4 x float>*
+  %c1 = bitcast float* %c0 to <4 x float>*
+  %d1 = bitcast float* %d0 to <4 x float>*
+
+  store <4 x float> %c, <4 x float> * %c1, align 4
+  store <4 x float> %a, <4 x float> * %a1, align 4
+
+  ; This fadd forces the compiler to pair %c and %e after fadd, and leave the
+  ; stores %a and %b separated by a stp. The dependence analysis needs then to
+  ; prove that it is safe to move %b past the stp to be paired with %a.
+  %e = fadd fast <4 x float> %d, %a
+
+  store <4 x float> %e, <4 x float>* %d1, align 4
+  store <4 x float> %b, <4 x float>* %b1, align 4
+
+  ret void
+}
-- 
2.7.4