From 910630c1e4a13513eba8e205ebe2cee2a9c18407 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Fri, 3 May 2019 22:37:46 +0000 Subject: [PATCH] [AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT on FPRs This saves us some unnecessary copies. If the inputs to a G_SELECT are floating point, we should use fcsel rather than csel. Changes here are... - Teach selectCopy about s1-to-s1 copies across register banks. - AArch64RegisterBankInfo about G_SELECT in general. - Teach the instruction selector about the FCSEL instructions. Also add two tests: - select-select.mir to show that we get the expected FCSEL - regbank-select.mir (unfortunately named) to show the register banks on G_SELECT are properly preserved And update fast-isel-select.ll to show that we do the same thing as other instruction selectors in these cases. llvm-svn: 359940 --- .../Target/AArch64/AArch64InstructionSelector.cpp | 48 ++++++++++++++-- .../lib/Target/AArch64/AArch64RegisterBankInfo.cpp | 51 +++++++++++------ .../CodeGen/AArch64/GlobalISel/regbank-select.mir | 60 ++++++++++++++++++++ .../CodeGen/AArch64/GlobalISel/select-select.mir | 66 ++++++++++++++++++++++ llvm/test/CodeGen/AArch64/fast-isel-select.ll | 7 +++ 5 files changed, 208 insertions(+), 24 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 76bbd94..bb878ef 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -521,6 +521,36 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, return true; } +/// Helper function to get the source and destination register classes for a +/// copy. Returns a std::pair containing the source register class for the +/// copy, and the destination register class for the copy. If a register class +/// cannot be determined, then it will be nullptr. +static std::pair +getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg = I.getOperand(1).getReg(); + const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); + + // Special casing for cross-bank copies of s1s. We can technically represent + // a 1-bit value with any size of register. The minimum size for a GPR is 32 + // bits. So, we need to put the FPR on 32 bits as well. + // + // FIXME: I'm not sure if this case holds true outside of copies. If it does, + // then we can pull it into the helpers that get the appropriate class for a + // register bank. Or make a new helper that carries along some constraint + // information. + if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) + SrcSize = DstSize = 32; + + return {getMinClassForRegBank(SrcRegBank, SrcSize, true), + getMinClassForRegBank(DstRegBank, DstSize, true)}; +} + static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { @@ -529,8 +559,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, unsigned SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); - const TargetRegisterClass *DstRC = getMinClassForRegBank( - DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true); + + // Find the correct register classes for the source and destination registers. + const TargetRegisterClass *SrcRC; + const TargetRegisterClass *DstRC; + std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); + if (!DstRC) { LLVM_DEBUG(dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); @@ -563,8 +597,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // a SUBREG_TO_REG. if (I.isCopy()) { // Yes. Check if there's anything to fix up. - const TargetRegisterClass *SrcRC = getMinClassForRegBank( - SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true); if (!SrcRC) { LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); return false; @@ -1724,12 +1756,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I, const unsigned TReg = I.getOperand(2).getReg(); const unsigned FReg = I.getOperand(3).getReg(); + // If we have a floating-point result, then we should use a floating point + // select instead of an integer select. + bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != + AArch64::GPRRegBankID); unsigned CSelOpc = 0; if (Ty == LLT::scalar(32)) { - CSelOpc = AArch64::CSELWr; + CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { - CSelOpc = AArch64::CSELXr; + CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; } else { return false; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index dd6f57d..7fdcde44 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -476,6 +476,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + // Helper lambda that returns true if MI has floating point constraints. + auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) { + unsigned Op = MI.getOpcode(); + + // Do we have an explicit floating point instruction? + if (isPreISelGenericFloatingPointOpcode(Op)) + return true; + + // No. Check if we have a copy-like instruction. If we do, then we could + // still be fed by floating point instructions. + if (Op != TargetOpcode::COPY && !MI.isPHI()) + return false; + + // MI is copy-like. Return true if it's using an FPR. + return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == + &AArch64::FPRRegBank; + }; + switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. @@ -657,30 +675,27 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } break; + case TargetOpcode::G_SELECT: { + // If the destination is FPR, preserve that. + if (OpRegBankIdx[0] != PMI_FirstGPR) + break; + LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); + if (SrcTy.isVector() || + any_of(MRI.use_instructions(MI.getOperand(0).getReg()), + [&](MachineInstr &MI) { return HasFPConstraints(MI); })) { + // Set the register bank of every operand to FPR. + for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); + Idx < NumOperands; ++Idx) + OpRegBankIdx[Idx] = PMI_FirstFPR; + } + break; + } case TargetOpcode::G_UNMERGE_VALUES: { // If the first operand belongs to a FPR register bank, then make sure that // we preserve that. if (OpRegBankIdx[0] != PMI_FirstGPR) break; - // Helper lambda that returns true if MI has floating point constraints. - auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) { - unsigned Op = MI.getOpcode(); - - // Do we have an explicit floating point instruction? - if (isPreISelGenericFloatingPointOpcode(Op)) - return true; - - // No. Check if we have a copy-like instruction. If we do, then we could - // still be fed by floating point instructions. - if (Op != TargetOpcode::COPY && !MI.isPHI()) - return false; - - // MI is copy-like. Return true if it's using an FPR. - return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == - &AArch64::FPRRegBank; - }; - LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); // UNMERGE into scalars from a vector should always use FPR. // Likewise if any of the uses are FP instructions. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir new file mode 100644 index 0000000..97b5434 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: select_f32 +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $s0, $s1, $w0 + + ; CHECK-LABEL: name: select_f32 + ; CHECK: liveins: $s0, $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; CHECK: $s0 = COPY [[SELECT]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s32) = COPY $s0 + %2:_(s32) = COPY $s1 + %4:_(s32) = G_SELECT %0(s1), %1, %2 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: select_f64 +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1, $w0 + + ; CHECK-LABEL: name: select_f64 + ; CHECK: liveins: $d0, $d1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; CHECK: $d0 = COPY [[SELECT]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s64) = COPY $d0 + %2:_(s64) = COPY $d1 + %4:_(s64) = G_SELECT %0(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir new file mode 100644 index 0000000..e3acf29 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: select_f32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $s0, $s1, $w0 + + ; CHECK-LABEL: name: select_f32 + ; CHECK: liveins: $s0, $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]] + ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %3:gpr(s32) = COPY $w0 + %0:gpr(s1) = G_TRUNC %3(s32) + %1:fpr(s32) = COPY $s0 + %2:fpr(s32) = COPY $s1 + %5:fpr(s1) = COPY %0(s1) + %4:fpr(s32) = G_SELECT %5(s1), %1, %2 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: select_f64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1, $w0 + + ; CHECK-LABEL: name: select_f64 + ; CHECK: liveins: $d0, $d1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]] + ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv + ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv + ; CHECK: $d0 = COPY [[FCSELDrrr]] + ; CHECK: RET_ReallyLR implicit $d0 + %3:gpr(s32) = COPY $w0 + %0:gpr(s1) = G_TRUNC %3(s32) + %1:fpr(s64) = COPY $d0 + %2:fpr(s64) = COPY $d1 + %5:fpr(s1) = COPY %0(s1) + %4:fpr(s64) = G_SELECT %5(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll index e06f74c..30ad4b8 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL ; First test the different supported value types for select. define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) { @@ -46,6 +47,9 @@ define float @select_f32(i1 zeroext %c, float %a, float %b) { ; CHECK-LABEL: select_f32 ; CHECK: {{cmp w0, #0|tst w0, #0x1}} ; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne +; GISEL-LABEL: select_f32 +; GISEL: {{cmp w0, #0|tst w0, #0x1}} +; GISEL-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne %1 = select i1 %c, float %a, float %b ret float %1 } @@ -54,6 +58,9 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) { ; CHECK-LABEL: select_f64 ; CHECK: {{cmp w0, #0|tst w0, #0x1}} ; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne +; GISEL-LABEL: select_f64 +; GISEL: {{cmp w0, #0|tst w0, #0x1}} +; GISEL-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne %1 = select i1 %c, double %a, double %b ret double %1 } -- 2.7.4