From 501f6a4e9e60fe54345f687d5d38f1cab52517ad Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 4 Mar 2021 10:18:31 -0800 Subject: [PATCH] [AArch64][GlobalISel][RegBankSelect] Improve rbs of G_BUILD_VECTOR when fed by fp values. This is actually two changes. One is to avoid copies when fp values are fed into a build_vector, without being able to tell from the opcode. The other is that build_vectors are also marked as only defining FP, since they produce vector results. Differential Revision: https://reviews.llvm.org/D97968 --- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 5 ++- .../GlobalISel/regbankselect-build-vector.mir | 40 ++++++++++++++++++++++ llvm/test/CodeGen/AArch64/combine-loads.ll | 10 +++--- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index cbc027c..7410c76 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -527,6 +527,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, case TargetOpcode::G_UITOFP: case TargetOpcode::G_EXTRACT_VECTOR_ELT: case TargetOpcode::G_INSERT_VECTOR_ELT: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BUILD_VECTOR_TRUNC: return true; default: break; @@ -880,7 +882,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { })) break; if (isPreISelGenericFloatingPointOpcode(DefOpc) || - SrcTy.getSizeInBits() < 32) { + SrcTy.getSizeInBits() < 32 || + getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { // Have a floating point op. // Make sure every operand gets mapped to a FPR register class. unsigned NumOperands = MI.getNumOperands(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir index 3129aec..6a9b309 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir @@ -69,3 +69,43 @@ body: | RET_ReallyLR implicit $q0 ... +--- +name: fed_by_fp_load +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$s0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $s0, $x0, $x1, $x2 + + ; CHECK-LABEL: name: fed_by_fp_load + ; CHECK: liveins: $s0, $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 328 + ; CHECK: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 344 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4) + ; CHECK: [[LOAD1:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:fpr(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(p0) = COPY $x0 + %4:_(s64) = G_CONSTANT i64 328 + %5:_(p0) = G_PTR_ADD %0, %4(s64) + %6:_(s64) = G_CONSTANT i64 344 + %7:_(p0) = G_PTR_ADD %0, %6(s64) + %15:_(s32) = G_LOAD %5(p0) :: (load 4) + %20:_(s32) = G_LOAD %7(p0) :: (load 4) + %21:_(<2 x s32>) = G_BUILD_VECTOR %15(s32), %20(s32) + $d0 = COPY %21(<2 x s32>) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll index c7275ed..be2501b 100644 --- a/llvm/test/CodeGen/AArch64/combine-loads.ll +++ b/llvm/test/CodeGen/AArch64/combine-loads.ll @@ -4,11 +4,13 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) { ; CHECK-LABEL: z: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x0, #8] ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: // implicit-def: $q1 +; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %b = load i64, i64* %p %p2 = getelementptr i64, i64* %p, i64 1 -- 2.7.4