[GlobalISel][AArch64] Improve register bank mappings for G_SELECT

author Jessica Paquette <jpaquette@apple.com>

Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)

committer Jessica Paquette <jpaquette@apple.com>

Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)
author Jessica Paquette <jpaquette@apple.com>
Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)
committer Jessica Paquette <jpaquette@apple.com>
Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp

index 7fdcde44e514a5d455e150ab8128fd0c9cd53ff0..699343614cc42ad36473145e1e3c499b96c01cca 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -679,15 +679,58 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
      // If the destination is FPR, preserve that.
      if (OpRegBankIdx[0] != PMI_FirstGPR)
        break;
+
+    // If we're taking in vectors, we have no choice but to put everything on
+    // FPRs.
      LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
-    if (SrcTy.isVector() ||
-        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
-               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
-      // Set the register bank of every operand to FPR.
-      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
-           Idx < NumOperands; ++Idx)
+    if (SrcTy.isVector()) {
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
          OpRegBankIdx[Idx] = PMI_FirstFPR;
+      break;
+    }
+
+    // Try to minimize the number of copies. If we have more floating point
+    // constrained values than not, then we'll put everything on FPR. Otherwise,
+    // everything has to be on GPR.
+    unsigned NumFP = 0;
+
+    // Check if the uses of the result always produce floating point values.
+    //
+    // For example:
+    //
+    // %z = G_SELECT %cond %x %y
+    // fpr = G_FOO %z ...
+    if (any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return HasFPConstraints(MI); }))
+      ++NumFP;
+
+    // Check if the defs of the source values always produce floating point
+    // values.
+    //
+    // For example:
+    //
+    // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+    // %z = G_SELECT %cond %x %y
+    //
+    // Also check whether or not the sources have already been decided to be
+    // FPR. Keep track of this.
+    //
+    // This doesn't check the condition, since it's just whatever is in NZCV.
+    // This isn't passed explicitly in a register to fcsel/csel.
+    for (unsigned Idx = 2; Idx < 4; ++Idx) {
+      unsigned VReg = MI.getOperand(Idx).getReg();
+      MachineInstr *DefMI = MRI.getVRegDef(VReg);
+      if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
+          HasFPConstraints(*DefMI))
+        ++NumFP;
      }
+
+    // If we have more FP constraints than not, then move everything over to
+    // FPR.
+    if (NumFP >= 2)
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+
      break;
    }
    case TargetOpcode::G_UNMERGE_VALUES: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir

index 97b543442bd8db69be82e6d509e41f3317b0700a..99c69160e67a30354d215407f55ac6a3f953b45c 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
@@ -58,3 +58,133 @@ body:             |
      %4:_(s64) = G_SELECT %0(s1), %1, %2
      $d0 = COPY %4(s64)
      RET_ReallyLR implicit $d0
+
+...
+---
+name:            two_fpr_inputs_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+    ; CHECK-LABEL: name: two_fpr_inputs_gpr_output
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Verify that the G_SELECT only has FPRs.
+    ; The only difference between fcsel and csel are the register banks. So,
+    ; if we have two FPR inputs and a GPR output, we should do a floating point
+    ; select anyway. This will cost one copy for the output, but that's less
+    ; than doing two to put the inputs on GPRs.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $d1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            one_fpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_fpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr(s64) = COPY [[COPY2]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same idea as the above test. If the output is an FPR, and one of the
+    ; inputs is an FPR, then it's fewer copies to just do a FCSEL.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            one_fpr_input_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_gpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr(s64) = COPY [[COPY1]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY3]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Now we have more GPR registers on the G_SELECT. It's cheaper here to put
+    ; everything on GPR.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            two_gpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $w0
+    ; CHECK-LABEL: name: two_gpr_input_fpr_output
+    ; CHECK: liveins: $x0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY $x0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same as above. The G_SELECT should get all GPRS.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $x0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
author	Jessica Paquette <jpaquette@apple.com>
	Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)
committer	Jessica Paquette <jpaquette@apple.com>
	Fri, 24 May 2019 19:35:25 +0000 (19:35 +0000)
llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir		patch \| blob \| history