}
}
+/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
+/// Helper function for functions like createDTuple and createQTuple.
+///
+/// \p RegClassIDs - The list of register class IDs available for some tuple of
+/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
+/// expected to contain between 2 and 4 tuple classes.
+///
+/// \p SubRegs - The list of subregister classes associated with each register
+/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
+/// subregister class. The index of each subregister class is expected to
+/// correspond with the index of each register class.
+///
+/// \returns Either the destination register of REG_SEQUENCE instruction that
+/// was created, or the 0th element of \p Regs if \p Regs contains a single
+/// element.
+static Register createTuple(ArrayRef<Register> Regs,
+ const unsigned RegClassIDs[],
+ const unsigned SubRegs[], MachineIRBuilder &MIB) {
+ unsigned NumRegs = Regs.size();
+ if (NumRegs == 1)
+ return Regs[0];
+ assert(NumRegs >= 2 && NumRegs <= 4 &&
+ "Only support between two and 4 registers in a tuple!");
+ const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
+ auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
+ auto RegSequence =
+ MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
+ for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
+ RegSequence.addUse(Regs[I]);
+ RegSequence.addImm(SubRegs[I]);
+ }
+ return RegSequence.getReg(0);
+}
+
+/// Create a tuple of D-registers using the registers in \p Regs.
+static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+ static const unsigned RegClassIDs[] = {
+ AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
+ static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
+ return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
+/// Create a tuple of Q-registers using the registers in \p Regs.
+static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+ static const unsigned RegClassIDs[] = {
+ AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
+ static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
+ return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
// Q registers for regalloc.
- auto RegSeq = MIB.buildInstr(TargetOpcode::REG_SEQUENCE,
- {&AArch64::QQRegClass}, {Src1Reg})
- .addImm(AArch64::qsub0)
- .addUse(Src2Reg)
- .addImm(AArch64::qsub1);
-
+ SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
+ auto RegSeq = createQTuple(Regs, MIB);
auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
{RegSeq, IndexLoad->getOperand(0)});
- constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
I.eraseFromParent();
return true;
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_st2: {
+ Register Src1 = I.getOperand(1).getReg();
+ Register Src2 = I.getOperand(2).getReg();
+ Register Ptr = I.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Src1);
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST2Twov2d;
+ else if (Ty == S64 | Ty == P0)
+ Opc = AArch64::ST1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for st2!");
+ SmallVector<Register, 2> Regs = {Src1, Src2};
+ Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
+ : createDTuple(Regs, MIB);
+ auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
+ Store.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+ break;
+ }
}
I.eraseFromParent();
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: v8i8_ST2Twov8b
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $x0
+
+ ; CHECK-LABEL: name: v8i8_ST2Twov8b
+ ; CHECK: liveins: $d0, $d1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr64 = COPY $d0
+ ; CHECK: %src2:fpr64 = COPY $d1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+ ; CHECK: ST2Twov8b [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<8 x s8>) = COPY $d0
+ %src2:fpr(<8 x s8>) = COPY $d1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s8>), %src2(<8 x s8>), %ptr(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v16i8_ST2Twov16b
+
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+
+ ; CHECK-LABEL: name: v16i8_ST2Twov16b
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr128 = COPY $q0
+ ; CHECK: %src2:fpr128 = COPY $q1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+ ; CHECK: ST2Twov16b [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<16 x s8>) = COPY $q0
+ %src2:fpr(<16 x s8>) = COPY $q1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<16 x s8>), %src2(<16 x s8>), %ptr(p0) :: (store (<4 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v4i16_ST2Twov4h
+
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $x0
+
+ ; CHECK-LABEL: name: v4i16_ST2Twov4h
+ ; CHECK: liveins: $d0, $d1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr64 = COPY $d0
+ ; CHECK: %src2:fpr64 = COPY $d1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+ ; CHECK: ST2Twov4h [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<4 x s16>) = COPY $d0
+ %src2:fpr(<4 x s16>) = COPY $d1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s16>), %src2(<4 x s16>), %ptr(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v8i16_ST2Twov8h
+
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+
+ ; CHECK-LABEL: name: v8i16_ST2Twov8h
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr128 = COPY $q0
+ ; CHECK: %src2:fpr128 = COPY $q1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+ ; CHECK: ST2Twov8h [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<8 x s16>) = COPY $q0
+ %src2:fpr(<8 x s16>) = COPY $q1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s16>), %src2(<8 x s16>), %ptr(p0) :: (store (<4 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v2i32_ST2Twov2s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $x0
+
+ ; CHECK-LABEL: name: v2i32_ST2Twov2s
+ ; CHECK: liveins: $d0, $d1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr64 = COPY $d0
+ ; CHECK: %src2:fpr64 = COPY $d1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+ ; CHECK: ST2Twov2s [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<2 x s32>) = COPY $d0
+ %src2:fpr(<2 x s32>) = COPY $d1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s32>), %src2(<2 x s32>), %ptr(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v4i32_ST2Twov4s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+
+ ; CHECK-LABEL: name: v4i32_ST2Twov4s
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr128 = COPY $q0
+ ; CHECK: %src2:fpr128 = COPY $q1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+ ; CHECK: ST2Twov4s [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<4 x s32>) = COPY $q0
+ %src2:fpr(<4 x s32>) = COPY $q1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s32>), %src2(<4 x s32>), %ptr(p0) :: (store (<4 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v2i64_ST2Twov2d_s64_elts
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+
+ ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_elts
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr128 = COPY $q0
+ ; CHECK: %src2:fpr128 = COPY $q1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+ ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<2 x s64>) = COPY $q0
+ %src2:fpr(<2 x s64>) = COPY $q1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s64>), %src2(<2 x s64>), %ptr(p0) :: (store (<4 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v2i64_ST2Twov2d_s64_p0_elts
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+
+ ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_p0_elts
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:fpr128 = COPY $q0
+ ; CHECK: %src2:fpr128 = COPY $q1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+ ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:fpr(<2 x p0>) = COPY $q0
+ %src2:fpr(<2 x p0>) = COPY $q1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x p0>), %src2(<2 x p0>), %ptr(p0) :: (store (<4 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v1i64_ST1Twov1d_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: v1i64_ST1Twov1d_s64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:gpr64all = COPY $x0
+ ; CHECK: %src2:gpr64all = COPY $x1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+ ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:gpr(s64) = COPY $x0
+ %src2:gpr(s64) = COPY $x1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(s64), %src2(s64), %ptr(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v1i64_ST1Twov1d_p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: v1i64_ST1Twov1d_p0
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %src1:gpr64all = COPY $x0
+ ; CHECK: %src2:gpr64all = COPY $x1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+ ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+ ; CHECK: RET_ReallyLR
+ %ptr:gpr(p0) = COPY $x0
+ %src1:gpr(p0) = COPY $x0
+ %src2:gpr(p0) = COPY $x1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(p0), %src2(p0), %ptr(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...