[AArch64][GlobalISel] Add selection support for G_VECREDUCE of <2 x i32>
authorAmara Emerson <amara@apple.com>
Sat, 20 Feb 2021 08:38:17 +0000 (00:38 -0800)
committerAmara Emerson <amara@apple.com>
Sat, 20 Feb 2021 08:39:38 +0000 (00:39 -0800)
This selects to a pairwise add and a subreg copy.

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-add.mir

index bf44fa7..e34d185 100644 (file)
@@ -3239,6 +3239,21 @@ bool AArch64InstructionSelector::selectReduction(
   Register VecReg = I.getOperand(1).getReg();
   LLT VecTy = MRI.getType(VecReg);
   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+    // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
+    // a subregister copy afterwards.
+    if (VecTy == LLT::vector(2, 32)) {
+      MachineIRBuilder MIB(I);
+      Register DstReg = I.getOperand(0).getReg();
+      auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
+                                 {VecReg, VecReg});
+      auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+                      .addReg(AddP.getReg(0), 0, AArch64::ssub)
+                      .getReg(0);
+      RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
+      I.eraseFromParent();
+      return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
+    }
+
     unsigned Opc = 0;
     if (VecTy == LLT::vector(16, 8))
       Opc = AArch64::ADDVv16i8v;
index 30d8893..afd5aa7 100644 (file)
@@ -87,6 +87,33 @@ body:             |
 
 ...
 ---
+name:            add_S_v2i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: add_S_v2i32
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8)
+    ; CHECK: [[ADDPv2i32_:%[0-9]+]]:fpr64 = ADDPv2i32 [[LDRDui]], [[LDRDui]]
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[ADDPv2i32_]].ssub
+    ; CHECK: $w0 = COPY [[COPY1]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(p0) = COPY $x0
+    %1:fpr(<2 x s32>) = G_LOAD %0(p0) :: (load 8)
+    %2:fpr(s32) = G_VECREDUCE_ADD %1(<2 x s32>)
+    $w0 = COPY %2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
 name:            add_D
 alignment:       4
 legalized:       true