// difficult because at RBS we may end up pessimizing the fpr case if we
// decided to add an anyextend to fix this. Manual selection is the most
// robust solution for now.
- Register SrcReg = I.getOperand(1).getReg();
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
+ AArch64::GPRRegBankID)
return false; // We expect the fpr regbank case to be imported.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.getSizeInBits() == 16)
- I.setDesc(TII.get(AArch64::DUPv8i16gpr));
- else if (SrcTy.getSizeInBits() == 8)
+ LLT VecTy = MRI.getType(I.getOperand(0).getReg());
+ if (VecTy == LLT::vector(8, 8))
+ I.setDesc(TII.get(AArch64::DUPv8i8gpr));
+ else if (VecTy == LLT::vector(16, 8))
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else if (VecTy == LLT::vector(4, 16))
+ I.setDesc(TII.get(AArch64::DUPv4i16gpr));
+ else if (VecTy == LLT::vector(8, 16))
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
else
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
...
---
+name: DUPv4s16gpr_s16_src
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: DUPv4s16gpr_s16_src
+ ; CHECK: liveins: $w0
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy
+ ; CHECK: $d0 = COPY %dup
+ ; CHECK: RET_ReallyLR implicit $d0
+ %copy:gpr(s32) = COPY $w0
+ %trunc:gpr(s16) = G_TRUNC %copy
+ %dup:fpr(<4 x s16>) = G_DUP %trunc(s16)
+ $d0 = COPY %dup(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
name: DUPv8i8gpr
alignment: 4
legalized: true
...
---
+name: DUPv8i8gpr_s8_src
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: DUPv8i8gpr_s8_src
+ ; CHECK: liveins: $w0
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy
+ ; CHECK: $d0 = COPY %dup
+ ; CHECK: RET_ReallyLR implicit $d0
+ %copy:gpr(s32) = COPY $w0
+ %trunc:gpr(s8) = G_TRUNC %copy(s32)
+ %dup:fpr(<8 x s8>) = G_DUP %trunc(s8)
+ $d0 = COPY %dup(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
name: DUPv16i8gpr
alignment: 4
legalized: true