From a86bbe1e3191800d42abf073a060eb8601b8be37 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 31 Aug 2021 17:22:39 -0700 Subject: [PATCH] [AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code. When we have an any-extending FPR bank load, none of the tablegen patterns match and we fall back to the C++ selector. Like with the truncating stores that were fixed recently, the C++ wasn't able to handle it and ended up generating invalid copies between different size regclasses. This change adds handling for this case, splitting the load into a regular load and a SUBREG_TO_REG to extend it into the original wide destination reg. --- .../AArch64/GISel/AArch64InstructionSelector.cpp | 24 ++++++++ .../CodeGen/AArch64/GlobalISel/select-load.mir | 68 ++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index c2951c4..930f836 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2764,6 +2764,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { .getReg(0); RBI.constrainGenericRegister(Copy, *RC, MRI); LdSt.getOperand(0).setReg(Copy); + } else if (isa(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { + // If this is an any-extending load from the FPR bank, split it into a regular + // load + extend. + if (RB.getID() == AArch64::FPRRegBankID) { + unsigned SubReg; + LLT MemTy = LdSt.getMMO().getMemoryType(); + auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); + if (!getSubRegForClass(RC, TRI, SubReg)) + return false; + Register OldDst = LdSt.getReg(0); + Register NewDst = + MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType()); + LdSt.getOperand(0).setReg(NewDst); + MRI.setRegBank(NewDst, RB); + // Generate a SUBREG_TO_REG to extend it. + MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator())); + MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) + .addImm(0) + .addUse(NewDst) + .addImm(SubReg); + auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI); + RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); + MIB.setInstr(LdSt); + } } // Helper lambda for partially selecting I. Either returns the original diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir index 4339005..f313e08 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -38,6 +38,8 @@ define void @load_4xi32(<4 x i32>* %ptr) { ret void } define void @load_8xi16(<8 x i16>* %ptr) { ret void } define void @load_16xi8(<16 x i8>* %ptr) { ret void } + define void @anyext_on_fpr() { ret void } + define void @anyext_on_fpr8() { ret void } ... @@ -638,3 +640,69 @@ body: | RET_ReallyLR implicit $q0 ... +--- +name: anyext_on_fpr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$w3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $w3, $x0, $x1, $x2 + + ; CHECK-LABEL: name: anyext_on_fpr + ; CHECK: liveins: $w3, $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16)) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRHui]], %subreg.hsub + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %16:fpr(s32) = G_LOAD %0(p0) :: (load (s16)) + %24:gpr(s32) = COPY %16(s32) + $w0 = COPY %24(s32) + RET_ReallyLR + +... +--- +name: anyext_on_fpr8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$w3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $w3, $x0, $x1, $x2 + + ; CHECK-LABEL: name: anyext_on_fpr8 + ; CHECK: liveins: $w3, $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8)) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRBui]], %subreg.bsub + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %16:fpr(s32) = G_LOAD %0(p0) :: (load (s8)) + %24:gpr(s32) = COPY %16(s32) + $w0 = COPY %24(s32) + RET_ReallyLR + +... -- 2.7.4