Register VecReg = I.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
+ // a subregister copy afterwards.
+ if (VecTy == LLT::vector(2, 32)) {
+ MachineIRBuilder MIB(I);
+ Register DstReg = I.getOperand(0).getReg();
+ auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
+ {VecReg, VecReg});
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(AddP.getReg(0), 0, AArch64::ssub)
+ .getReg(0);
+ RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
+ }
+
unsigned Opc = 0;
if (VecTy == LLT::vector(16, 8))
Opc = AArch64::ADDVv16i8v;
...
---
+name: add_S_v2i32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: add_S_v2i32
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8)
+ ; CHECK: [[ADDPv2i32_:%[0-9]+]]:fpr64 = ADDPv2i32 [[LDRDui]], [[LDRDui]]
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[ADDPv2i32_]].ssub
+ ; CHECK: $w0 = COPY [[COPY1]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:gpr(p0) = COPY $x0
+ %1:fpr(<2 x s32>) = G_LOAD %0(p0) :: (load 8)
+ %2:fpr(s32) = G_VECREDUCE_ADD %1(<2 x s32>)
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
name: add_D
alignment: 4
legalized: true