From 1fbe9bcab479522e7704716f0f57c7c09d27900a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 15 Apr 2016 18:09:10 +0000 Subject: [PATCH] [AArch64] Add load/store pair instructions to getMemOpBaseRegImmOfsWidth(). This improves AA in the MI schduler when reason about paired instructions. Phabricator Revision: http://reviews.llvm.org/D17098 PR26358 llvm-svn: 266462 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 51 +++++++++++++++++++--- .../AArch64/arm64-alloc-no-stack-realign.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-stp-aa.ll | 34 +++++++++++++++ 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 91b2e14..0ad1ddf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1466,9 +1466,15 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( const TargetRegisterInfo *TRI) const { assert(LdSt->mayLoadOrStore() && "Expected a memory operation."); // Handle only loads/stores with base register followed by immediate offset. - if (LdSt->getNumExplicitOperands() != 3) - return false; - if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + if (LdSt->getNumExplicitOperands() == 3) { + // Non-paired instruction (e.g., ldr x1, [x0, #8]). + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + } else if (LdSt->getNumExplicitOperands() == 4) { + // Paired instruction (e.g., ldp x1, x2, [x0, #8]). + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isReg() || !LdSt->getOperand(3).isImm()) + return false; + } else return false; // Offset is calculated as the immediate operand multiplied by the scaling factor. @@ -1515,16 +1521,45 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( Width = 1; Scale = 1; break; + case AArch64::LDPQi: + case AArch64::LDNPQi: + case AArch64::STPQi: + case AArch64::STNPQi: + Scale = 16; + Width = 32; + break; case AArch64::LDRQui: case AArch64::STRQui: Scale = Width = 16; break; + case AArch64::LDPXi: + case AArch64::LDPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STPXi: + case AArch64::STPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + Scale = 8; + Width = 16; + break; case AArch64::LDRXui: case AArch64::LDRDui: case AArch64::STRXui: case AArch64::STRDui: Scale = Width = 8; break; + case AArch64::LDPWi: + case AArch64::LDPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STPWi: + case AArch64::STPSi: + case AArch64::STNPWi: + case AArch64::STNPSi: + Scale = 4; + Width = 8; + break; case AArch64::LDRWui: case AArch64::LDRSui: case AArch64::LDRSWui: @@ -1546,8 +1581,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( break; } - BaseReg = LdSt->getOperand(1).getReg(); - Offset = LdSt->getOperand(2).getImm() * Scale; + if (LdSt->getNumExplicitOperands() == 3) { + BaseReg = LdSt->getOperand(1).getReg(); + Offset = LdSt->getOperand(2).getImm() * Scale; + } else { + assert(LdSt->getNumExplicitOperands() == 4 && "invalid number of operands"); + BaseReg = LdSt->getOperand(2).getReg(); + Offset = LdSt->getOperand(3).getImm() * Scale; + } return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll index bf2d2cf..71bf203 100644 --- a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false -enable-post-misched=false | FileCheck %s ; rdar://12713765 ; Make sure we are not creating stack objects that are assumed to be 64-byte diff --git a/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll b/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll index 82d343d..2a45745 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stp-aa.ll @@ -109,3 +109,37 @@ define double @stp_double_aa_after(double %d0, double %a, double %b, double* noc store double %b, double* %add.ptr, align 8 ret double %tmp } + +; Check that the stores %c and %d are paired after the fadd instruction, +; and then the stores %a and %d are paired after proving that they do not +; depend on the the (%c, %d) pair. +; +; CHECK-LABEL: st1: +; CHECK: stp q0, q1, [x{{[0-9]+}}] +; CHECK: fadd +; CHECK: stp q2, q0, [x{{[0-9]+}}, #32] +define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) { +entry: + %a0 = getelementptr inbounds float, float* %base, i64 %index + %b0 = getelementptr float, float* %a0, i64 4 + %c0 = getelementptr float, float* %a0, i64 8 + %d0 = getelementptr float, float* %a0, i64 12 + + %a1 = bitcast float* %a0 to <4 x float>* + %b1 = bitcast float* %b0 to <4 x float>* + %c1 = bitcast float* %c0 to <4 x float>* + %d1 = bitcast float* %d0 to <4 x float>* + + store <4 x float> %c, <4 x float> * %c1, align 4 + store <4 x float> %a, <4 x float> * %a1, align 4 + + ; This fadd forces the compiler to pair %c and %e after fadd, and leave the + ; stores %a and %b separated by a stp. The dependence analysis needs then to + ; prove that it is safe to move %b past the stp to be paired with %a. + %e = fadd fast <4 x float> %d, %a + + store <4 x float> %e, <4 x float>* %d1, align 4 + store <4 x float> %b, <4 x float>* %b1, align 4 + + ret void +} -- 2.7.4