(apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
>;
+def form_duplane_matchdata :
+ GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+ (defs root:$root, form_duplane_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
def adjust_icmp_imm_matchdata :
GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
def adjust_icmp_imm : GICombineRule <
def AArch64PostLegalizerLoweringHelper
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
[shuffle_vector_pseudos, vashr_vlshr_imm,
- icmp_lowering]> {
+ icmp_lowering, form_duplane]> {
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}
let InOperandList = (ins type1:$lane);
let hasSideEffects = 0;
}
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+
// Represents a trn1 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_TRN1 : AArch64GenericInstruction {
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy,
- ArrayRef<int> Mask, MachineRegisterInfo &MRI) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
return nullptr;
}
-bool AArch64InstructionSelector::tryOptShuffleDupLane(
- MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef<int> Mask,
- MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-
- // We assume that scalar->vector splats have been been handled in the
- // post-legalizer combiner to G_DUP. However splats of a source vector's
- // lane don't fit that pattern, detect it here:
- // %res = G_SHUFFLE_VECTOR %src:<n x ty>, undef, <n x i32> splat(lane-idx)
- // =>
- // %res = DUPv[N][Ty]lane %src, lane-idx
- // FIXME: this case should be covered by re-implementing the perfect shuffle
- // codegen mechanism.
-
- auto LaneIdx = getSplatIndex(I);
- if (!LaneIdx)
- return false;
-
- // The lane idx should be within the first source vector.
- if (*LaneIdx >= SrcTy.getNumElements())
- return false;
-
- if (DstTy != SrcTy)
- return false;
-
- LLT ScalarTy = SrcTy.getElementType();
- unsigned ScalarSize = ScalarTy.getSizeInBits();
-
- unsigned Opc = 0;
- switch (SrcTy.getNumElements()) {
- case 2:
- if (ScalarSize == 64)
- Opc = AArch64::DUPv2i64lane;
- break;
- case 4:
- if (ScalarSize == 32)
- Opc = AArch64::DUPv4i32lane;
- break;
- case 8:
- if (ScalarSize == 16)
- Opc = AArch64::DUPv8i16lane;
- break;
- case 16:
- if (ScalarSize == 8)
- Opc = AArch64::DUPv16i8lane;
- break;
- default:
- break;
- }
- if (!Opc)
- return false;
-
- MachineIRBuilder MIB(I);
- auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()},
- {I.getOperand(1).getReg()})
- .addImm(*LaneIdx);
- constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
-}
-
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
return false;
}
- if (tryOptShuffleDupLane(I, DstTy, Src1Ty, Mask, MRI))
- return true;
-
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
return true;
}
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Src1Reg = MI.getOperand(1).getReg();
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto LaneIdx = getSplatIndex(MI);
+ if (!LaneIdx)
+ return false;
+
+ // The lane idx should be within the first source vector.
+ if (*LaneIdx >= SrcTy.getNumElements())
+ return false;
+
+ if (DstTy != SrcTy)
+ return false;
+
+ LLT ScalarTy = SrcTy.getElementType();
+ unsigned ScalarSize = ScalarTy.getSizeInBits();
+
+ unsigned Opc = 0;
+ switch (SrcTy.getNumElements()) {
+ case 2:
+ if (ScalarSize == 64)
+ Opc = AArch64::G_DUPLANE64;
+ break;
+ case 4:
+ if (ScalarSize == 32)
+ Opc = AArch64::G_DUPLANE32;
+ break;
+ case 8:
+ if (ScalarSize == 16)
+ Opc = AArch64::G_DUPLANE16;
+ break;
+ case 16:
+ if (ScalarSize == 8)
+ Opc = AArch64::G_DUPLANE8;
+ break;
+ default:
+ break;
+ }
+ if (!Opc)
+ return false;
+
+ MatchInfo.first = Opc;
+ MatchInfo.second = *LaneIdx;
+ return true;
+}
+
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ B.setInstrAndDebugLoc(MI);
+ auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
+ B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
+ {MI.getOperand(1).getReg(), Lane});
+ MI.eraseFromParent();
+ return true;
+}
+
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -global-isel -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECTED
+
+---
+name: duplane64
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: duplane64
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[DUPLANE64_:%[0-9]+]]:_(<2 x s64>) = G_DUPLANE64 [[COPY]], [[C]](s64)
+ ; CHECK: $q0 = COPY [[DUPLANE64_]](<2 x s64>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ ; SELECTED-LABEL: name: duplane64
+ ; SELECTED: liveins: $q0
+ ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; SELECTED: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0
+ ; SELECTED: $q0 = COPY [[DUPv2i64lane]]
+ ; SELECTED: RET_ReallyLR implicit $q0
+ %1:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
+ $q0 = COPY %4(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: duplane32
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: duplane32
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[DUPLANE32_:%[0-9]+]]:_(<4 x s32>) = G_DUPLANE32 [[COPY]], [[C]](s64)
+ ; CHECK: $q0 = COPY [[DUPLANE32_]](<4 x s32>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ ; SELECTED-LABEL: name: duplane32
+ ; SELECTED: liveins: $q0
+ ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; SELECTED: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0
+ ; SELECTED: $q0 = COPY [[DUPv4i32lane]]
+ ; SELECTED: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = G_IMPLICIT_DEF
+ %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: duplane16
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: duplane16
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[DUPLANE16_:%[0-9]+]]:_(<8 x s16>) = G_DUPLANE16 [[COPY]], [[C]](s64)
+ ; CHECK: $q0 = COPY [[DUPLANE16_]](<8 x s16>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ ; SELECTED-LABEL: name: duplane16
+ ; SELECTED: liveins: $q0
+ ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; SELECTED: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0
+ ; SELECTED: $q0 = COPY [[DUPv8i16lane]]
+ ; SELECTED: RET_ReallyLR implicit $q0
+ %1:_(<8 x s16>) = COPY $q0
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %4:_(<8 x s16>) = G_SHUFFLE_VECTOR %1(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
+ $q0 = COPY %4(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: duplane8
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: duplane8
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[DUPLANE8_:%[0-9]+]]:_(<16 x s8>) = G_DUPLANE8 [[COPY]], [[C]](s64)
+ ; CHECK: $q0 = COPY [[DUPLANE8_]](<16 x s8>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ ; SELECTED-LABEL: name: duplane8
+ ; SELECTED: liveins: $q0
+ ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; SELECTED: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0
+ ; SELECTED: $q0 = COPY [[DUPv16i8lane]]
+ ; SELECTED: RET_ReallyLR implicit $q0
+ %1:_(<16 x s8>) = COPY $q0
+ %2:_(<16 x s8>) = G_IMPLICIT_DEF
+ %4:_(<16 x s8>) = G_SHUFFLE_VECTOR %1(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ $q0 = COPY %4(<16 x s8>)
+ RET_ReallyLR implicit $q0
+
+...
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
-...
----
-name: duplane_v16i8
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
- - { reg: '$q0' }
-body: |
- bb.1:
- liveins: $q0
-
- ; CHECK-LABEL: name: duplane_v16i8
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0
- ; CHECK: $q0 = COPY [[DUPv16i8lane]]
- ; CHECK: RET_ReallyLR implicit $q0
- %0:fpr(<16 x s8>) = COPY $q0
- %2:fpr(<16 x s8>) = G_IMPLICIT_DEF
- %1:fpr(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
- $q0 = COPY %1(<16 x s8>)
- RET_ReallyLR implicit $q0
-
-...
----
-name: duplane_v8i16
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
- - { reg: '$q0' }
-body: |
- bb.1:
- liveins: $q0
-
- ; CHECK-LABEL: name: duplane_v8i16
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0
- ; CHECK: $q0 = COPY [[DUPv8i16lane]]
- ; CHECK: RET_ReallyLR implicit $q0
- %0:fpr(<8 x s16>) = COPY $q0
- %2:fpr(<8 x s16>) = G_IMPLICIT_DEF
- %1:fpr(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
- $q0 = COPY %1(<8 x s16>)
- RET_ReallyLR implicit $q0
-
-...
----
-name: duplane_v4f32
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
- - { reg: '$q0' }
-body: |
- bb.1:
- liveins: $q0
-
- ; CHECK-LABEL: name: duplane_v4f32
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0
- ; CHECK: $q0 = COPY [[DUPv4i32lane]]
- ; CHECK: RET_ReallyLR implicit $q0
- %0:fpr(<4 x s32>) = COPY $q0
- %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
- %1:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
- $q0 = COPY %1(<4 x s32>)
- RET_ReallyLR implicit $q0
-
-...
----
-name: duplane_v2i64
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
- - { reg: '$q0' }
-body: |
- bb.1:
- liveins: $q0
-
- ; CHECK-LABEL: name: duplane_v2i64
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0
- ; CHECK: $q0 = COPY [[DUPv2i64lane]]
- ; CHECK: RET_ReallyLR implicit $q0
- %0:fpr(<2 x s64>) = COPY $q0
- %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
- %1:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, 0)
- $q0 = COPY %1(<2 x s64>)
- RET_ReallyLR implicit $q0
-
-...