}
}
+/// Expand source vectors to the size of destination vector.
+static LegalizerHelper::LegalizeResult
+equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ unsigned MaskNumElts = Mask.size();
+ unsigned SrcNumElts = SrcTy.getNumElements();
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DestEltTy = DstTy.getElementType();
+
+ // TODO: Normalize the shuffle vector since mask and vector length don't
+ // match.
+ if (MaskNumElts <= SrcNumElts) {
+ return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ }
+
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
+
+ // Create new source vectors by concatenating the initial
+ // source vectors with undefined vectors of the same size.
+ auto Undef = MIRBuilder.buildUndef(SrcTy);
+ SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
+ SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
+ MOps1[0] = MI.getOperand(1).getReg();
+ MOps2[0] = MI.getOperand(2).getReg();
+
+ auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
+ auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
+ for (unsigned I = 0; I != MaskNumElts; ++I) {
+ int Idx = Mask[I];
+ if (Idx >= static_cast<int>(SrcNumElts))
+ Idx += PaddedMaskNumElts - SrcNumElts;
+ MappedOps[I] = Idx;
+ }
+
+ // If we got more elements than required, extract subvector.
+ if (MaskNumElts != PaddedMaskNumElts) {
+ auto Shuffle =
+ MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
+
+ SmallVector<Register, 16> Elts(MaskNumElts);
+ for (unsigned I = 0; I < MaskNumElts; ++I) {
+ Elts[I] =
+ MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
+ .getReg(0);
+ }
+ MIRBuilder.buildBuildVector(DstReg, Elts);
+ } else {
+ MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
+ }
+
+ MI.eraseFromParent();
+ return LegalizerHelper::LegalizeResult::Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
+ if (DstTy.isVector() && Src1Ty.isVector() &&
+ DstTy.getNumElements() > Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ }
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
// Expect a canonicalized shuffle.
if (DstTy != Src1Ty || DstTy != Src2Ty)
return UnableToLegalize;
RET_ReallyLR
...
+---
+name: shuffle_v4i32_v1i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: shuffle_v4i32_v1i32
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(0, 1, 5, 6)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = COPY $w2
+ %3:_(<3 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32)
+ %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %3(<3 x s32>), %3, shufflemask(0, 1, 4, 5)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: shuffle_v4i32_v2i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $d1
+
+ ; CHECK-LABEL: name: shuffle_v4i32_v2i32
+ ; CHECK: liveins: $q0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<4 x s32>), [[CONCAT_VECTORS1]], shufflemask(0, 1, 4, 5)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 1, 2, 3)
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: shuffle_v8i16_v4i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_v8i16_v4i16
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY1]](<4 x s16>), [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s16>), [[CONCAT_VECTORS1]], shufflemask(11, 10, 9, 8, 3, 2, 1, 0)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<8 x s16>) = G_SHUFFLE_VECTOR %0(<4 x s16>), %1, shufflemask(7, 6, 5, 4, 3, 2, 1, 0)
+ $q0 = COPY %2(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: shuffle_v16i8_v8i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_v16i8_v8i8
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[COPY]](<8 x s8>), [[DEF]](<8 x s8>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[COPY1]](<8 x s8>), [[DEF]](<8 x s8>)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(7, 21, 6, 4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s8>) = COPY $d0
+ %1:_(<8 x s8>) = COPY $d1
+ %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(7, 13, 6, 4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0)
+ $q0 = COPY %2(<16 x s8>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: size_shuffle_v6i32_v4i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7, $x0
+
+ ; CHECK-LABEL: name: size_shuffle_v6i32_v4i32
+ ; CHECK: liveins: $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $s3
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $s4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $s5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $s6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $s7
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(3, 4, 7, 0)
+ ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(1, 5, undef, undef)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC4]](s32), [[EVEC5]](s32)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16)
+ ; CHECK-NEXT: RET_ReallyLR
+ %3:_(s32) = COPY $s0
+ %4:_(s32) = COPY $s1
+ %5:_(s32) = COPY $s2
+ %6:_(s32) = COPY $s3
+ %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32)
+ %7:_(s32) = COPY $s4
+ %8:_(s32) = COPY $s5
+ %9:_(s32) = COPY $s6
+ %10:_(s32) = COPY $s7
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32), %10(s32)
+ %2:_(p0) = COPY $x0
+ %19:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(3, 4, 7, 0, 1, 5)
+ G_STORE %19(<6 x s32>), %2(p0) :: (store (<6 x s32>), align 32)
+ RET_ReallyLR
+
+...