From 68a09c929003bf6af41162ed9e6dc4713d96a997 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 22 Jun 2023 12:46:54 +0100
Subject: [PATCH] [AArch64] Remove G_VECREDUCE_FADD from selectReduction

I believe that for fp reductions we can use the imported tablegen patterns for
selection, as opposed to going via selectReduction. Integer reductions are more
difficult, as the return types in selection DAG will be promoted to i32.

Differential Revision: https://reviews.llvm.org/D153244
---
 .../llvm/Target/GlobalISel/SelectionDAGCompat.td       |  1 +
 .../AArch64/GISel/AArch64InstructionSelector.cpp       | 14 --------------
 .../CodeGen/AArch64/GlobalISel/select-reduce-fadd.mir  | 18 ++++++++++--------
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index cf78ac7..41a9539 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -151,6 +151,7 @@ def : GINodeEquiv<G_ROTR, rotr>;
 def : GINodeEquiv<G_ROTL, rotl>;
 def : GINodeEquiv<G_LROUND, lround>;
 def : GINodeEquiv<G_LLROUND, llround>;
+def : GINodeEquiv<G_VECREDUCE_FADD, vecreduce_fadd>;
 
 def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
 def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index b18e7f7..29c9979 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3509,7 +3509,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return selectConcatVectors(I, MRI);
   case TargetOpcode::G_JUMP_TABLE:
     return selectJumpTable(I, MRI);
-  case TargetOpcode::G_VECREDUCE_FADD:
   case TargetOpcode::G_VECREDUCE_ADD:
     return selectReduction(I, MRI);
   case TargetOpcode::G_MEMCPY:
@@ -3559,19 +3558,6 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
-  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
-    unsigned Opc = 0;
-    if (VecTy == LLT::fixed_vector(2, 32))
-      Opc = AArch64::FADDPv2i32p;
-    else if (VecTy == LLT::fixed_vector(2, 64))
-      Opc = AArch64::FADDPv2i64p;
-    else {
-      LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
-      return false;
-    }
-    I.setDesc(TII.get(Opc));
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-fadd.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-fadd.mir
index db1cee2..85d9e1c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-fadd.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-fadd.mir
@@ -11,10 +11,11 @@ body:             |
 
     ; CHECK-LABEL: name: fadd_v2s32
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
-    ; CHECK: $w0 = COPY [[FADDPv2i32p]]
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nofpexcept FADDPv2i32p [[COPY]], implicit $fpcr
+    ; CHECK-NEXT: $w0 = COPY [[FADDPv2i32p]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(s32) = G_VECREDUCE_FADD %0(<2 x s32>)
     $w0 = COPY %1(s32)
@@ -32,10 +33,11 @@ body:             |
 
     ; CHECK-LABEL: name: fadd_v2s64
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
-    ; CHECK: $x0 = COPY [[FADDPv2i64p]]
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[FADDPv2i64p:%[0-9]+]]:fpr64 = nofpexcept FADDPv2i64p [[COPY]], implicit $fpcr
+    ; CHECK-NEXT: $x0 = COPY [[FADDPv2i64p]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:fpr(<2 x s64>) = COPY $q0
     %2:fpr(s64) = G_VECREDUCE_FADD %0(<2 x s64>)
     $x0 = COPY %2(s64)
-- 
2.7.4