From 910630c1e4a13513eba8e205ebe2cee2a9c18407 Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Fri, 3 May 2019 22:37:46 +0000
Subject: [PATCH] [AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT
 on FPRs

This saves us some unnecessary copies.

If the inputs to a G_SELECT are floating point, we should use fcsel rather than
csel.

Changes here are...

- Teach selectCopy about s1-to-s1 copies across register banks.
- AArch64RegisterBankInfo about G_SELECT in general.
- Teach the instruction selector about the FCSEL instructions.

Also add two tests:

- select-select.mir to show that we get the expected FCSEL
- regbank-select.mir (unfortunately named) to show the register banks on
G_SELECT are properly preserved

And update fast-isel-select.ll to show that we do the same thing as other
instruction selectors in these cases.

llvm-svn: 359940
---
 .../Target/AArch64/AArch64InstructionSelector.cpp  | 48 ++++++++++++++--
 .../lib/Target/AArch64/AArch64RegisterBankInfo.cpp | 51 +++++++++++------
 .../CodeGen/AArch64/GlobalISel/regbank-select.mir  | 60 ++++++++++++++++++++
 .../CodeGen/AArch64/GlobalISel/select-select.mir   | 66 ++++++++++++++++++++++
 llvm/test/CodeGen/AArch64/fast-isel-select.ll      |  7 +++
 5 files changed, 208 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 76bbd94..bb878ef 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -521,6 +521,36 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
   return true;
 }
 
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+                     const RegisterBankInfo &RBI) {
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  // Special casing for cross-bank copies of s1s. We can technically represent
+  // a 1-bit value with any size of register. The minimum size for a GPR is 32
+  // bits. So, we need to put the FPR on 32 bits as well.
+  //
+  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+  // then we can pull it into the helpers that get the appropriate class for a
+  // register bank. Or make a new helper that carries along some constraint
+  // information.
+  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+    SrcSize = DstSize = 32;
+
+  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+          getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                        const RegisterBankInfo &RBI) {
@@ -529,8 +559,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   unsigned SrcReg = I.getOperand(1).getReg();
   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
-  const TargetRegisterClass *DstRC = getMinClassForRegBank(
-      DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
+
+  // Find the correct register classes for the source and destination registers.
+  const TargetRegisterClass *SrcRC;
+  const TargetRegisterClass *DstRC;
+  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
+
   if (!DstRC) {
     LLVM_DEBUG(dbgs() << "Unexpected dest size "
                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
@@ -563,8 +597,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   // a SUBREG_TO_REG.
   if (I.isCopy()) {
     // Yes. Check if there's anything to fix up.
-    const TargetRegisterClass *SrcRC = getMinClassForRegBank(
-        SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
     if (!SrcRC) {
       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
       return false;
@@ -1724,12 +1756,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     const unsigned TReg = I.getOperand(2).getReg();
     const unsigned FReg = I.getOperand(3).getReg();
 
+    // If we have a floating-point result, then we should use a floating point
+    // select instead of an integer select.
+    bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+                 AArch64::GPRRegBankID);
     unsigned CSelOpc = 0;
 
     if (Ty == LLT::scalar(32)) {
-      CSelOpc = AArch64::CSELWr;
+      CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
     } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
-      CSelOpc = AArch64::CSELXr;
+      CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
     } else {
       return false;
     }
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index dd6f57d..7fdcde44 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -476,6 +476,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
+  // Helper lambda that returns true if MI has floating point constraints.
+  auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
+    unsigned Op = MI.getOpcode();
+
+    // Do we have an explicit floating point instruction?
+    if (isPreISelGenericFloatingPointOpcode(Op))
+      return true;
+
+    // No. Check if we have a copy-like instruction. If we do, then we could
+    // still be fed by floating point instructions.
+    if (Op != TargetOpcode::COPY && !MI.isPHI())
+      return false;
+
+    // MI is copy-like. Return true if it's using an FPR.
+    return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+           &AArch64::FPRRegBank;
+  };
+
   switch (Opc) {
     // G_{F|S|U}REM are not listed because they are not legal.
     // Arithmetic ops.
@@ -657,30 +675,27 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       break;
     }
     break;
+  case TargetOpcode::G_SELECT: {
+    // If the destination is FPR, preserve that.
+    if (OpRegBankIdx[0] != PMI_FirstGPR)
+      break;
+    LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+    if (SrcTy.isVector() ||
+        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
+      // Set the register bank of every operand to FPR.
+      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+           Idx < NumOperands; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+    }
+    break;
+  }
   case TargetOpcode::G_UNMERGE_VALUES: {
     // If the first operand belongs to a FPR register bank, then make sure that
     // we preserve that.
     if (OpRegBankIdx[0] != PMI_FirstGPR)
       break;
 
-    // Helper lambda that returns true if MI has floating point constraints.
-    auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
-      unsigned Op = MI.getOpcode();
-
-      // Do we have an explicit floating point instruction?
-      if (isPreISelGenericFloatingPointOpcode(Op))
-        return true;
-
-      // No. Check if we have a copy-like instruction. If we do, then we could
-      // still be fed by floating point instructions.
-      if (Op != TargetOpcode::COPY && !MI.isPHI())
-        return false;
-
-      // MI is copy-like. Return true if it's using an FPR.
-      return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
-             &AArch64::FPRRegBank;
-    };
-
     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
     // UNMERGE into scalars from a vector should always use FPR.
     // Likewise if any of the uses are FP instructions.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
new file mode 100644
index 0000000..97b5434
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
@@ -0,0 +1,60 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            select_f32
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $s0, $s1, $w0
+
+    ; CHECK-LABEL: name: select_f32
+    ; CHECK: liveins: $s0, $s1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $s0 = COPY [[SELECT]](s32)
+    ; CHECK: RET_ReallyLR implicit $s0
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s32) = COPY $s0
+    %2:_(s32) = COPY $s1
+    %4:_(s32) = G_SELECT %0(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            select_f64
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+
+    ; CHECK-LABEL: name: select_f64
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $d1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
new file mode 100644
index 0000000..e3acf29
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            select_f32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $s0, $s1, $w0
+
+    ; CHECK-LABEL: name: select_f32
+    ; CHECK: liveins: $s0, $s1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+    ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %3:gpr(s32) = COPY $w0
+    %0:gpr(s1) = G_TRUNC %3(s32)
+    %1:fpr(s32) = COPY $s0
+    %2:fpr(s32) = COPY $s1
+    %5:fpr(s1) = COPY %0(s1)
+    %4:fpr(s32) = G_SELECT %5(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            select_f64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+
+    ; CHECK-LABEL: name: select_f64
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+    ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %3:gpr(s32) = COPY $w0
+    %0:gpr(s1) = G_TRUNC %3(s32)
+    %1:fpr(s64) = COPY $d0
+    %2:fpr(s64) = COPY $d1
+    %5:fpr(s1) = COPY %0(s1)
+    %4:fpr(s64) = G_SELECT %5(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index e06f74c..30ad4b8 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
 
 ; First test the different supported value types for select.
 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -46,6 +47,9 @@ define float @select_f32(i1 zeroext %c, float %a, float %b) {
 ; CHECK-LABEL: select_f32
 ; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
 ; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32
+; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
   %1 = select i1 %c, float %a, float %b
   ret float %1
 }
@@ -54,6 +58,9 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) {
 ; CHECK-LABEL: select_f64
 ; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
 ; CHECK-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64
+; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
   %1 = select i1 %c, double %a, double %b
   ret double %1
 }
-- 
2.7.4