From 501f6a4e9e60fe54345f687d5d38f1cab52517ad Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Thu, 4 Mar 2021 10:18:31 -0800
Subject: [PATCH] [AArch64][GlobalISel][RegBankSelect] Improve rbs of
 G_BUILD_VECTOR when fed by fp values.

This is actually two changes. One is to avoid copies when fp values are fed into
a build_vector, without being able to tell from the opcode.

The other is that build_vectors are also marked as only defining FP, since they
produce vector results.

Differential Revision: https://reviews.llvm.org/D97968
---
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp      |  5 ++-
 .../GlobalISel/regbankselect-build-vector.mir      | 40 ++++++++++++++++++++++
 llvm/test/CodeGen/AArch64/combine-loads.ll         | 10 +++---
 3 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index cbc027c..7410c76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -527,6 +527,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   case TargetOpcode::G_UITOFP:
   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
   case TargetOpcode::G_INSERT_VECTOR_ELT:
+  case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_BUILD_VECTOR_TRUNC:
     return true;
   default:
     break;
@@ -880,7 +882,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         }))
       break;
     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
-        SrcTy.getSizeInBits() < 32) {
+        SrcTy.getSizeInBits() < 32 ||
+        getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
       // Have a floating point op.
       // Make sure every operand gets mapped to a FPR register class.
       unsigned NumOperands = MI.getNumOperands();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
index 3129aec..6a9b309 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
@@ -69,3 +69,43 @@ body:             |
     RET_ReallyLR implicit $q0
 
 ...
+---
+name:            fed_by_fp_load
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+  - { reg: '$x2' }
+  - { reg: '$s0' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $s0, $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: fed_by_fp_load
+    ; CHECK: liveins: $s0, $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 328
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 344
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4)
+    ; CHECK: [[LOAD1:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:fpr(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(p0) = COPY $x0
+    %4:_(s64) = G_CONSTANT i64 328
+    %5:_(p0) = G_PTR_ADD %0, %4(s64)
+    %6:_(s64) = G_CONSTANT i64 344
+    %7:_(p0) = G_PTR_ADD %0, %6(s64)
+    %15:_(s32) = G_LOAD %5(p0) :: (load 4)
+    %20:_(s32) = G_LOAD %7(p0) :: (load 4)
+    %21:_(<2 x s32>) = G_BUILD_VECTOR %15(s32), %20(s32)
+    $d0 = COPY %21(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll
index c7275ed..be2501b 100644
--- a/llvm/test/CodeGen/AArch64/combine-loads.ll
+++ b/llvm/test/CodeGen/AArch64/combine-loads.ll
@@ -4,11 +4,13 @@
 define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
 ; CHECK-LABEL: z:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr x9, [x0]
-; CHECK-NEXT:    ldr x8, [x0, #8]
+; CHECK-NEXT:    ldr d1, [x0]
+; CHECK-NEXT:    ldr d2, [x0, #8]
 ; CHECK-NEXT:    // implicit-def: $q0
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // implicit-def: $q1
+; CHECK-NEXT:    mov v1.16b, v2.16b
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %b = load i64, i64* %p
   %p2 = getelementptr i64, i64* %p, i64 1
-- 
2.7.4