The lowering of vector selects needs to first splat the scalar mask into a vector
first.
This was causing a crash when building oggenc in the test suite.
Differential Revision: https://reviews.llvm.org/D91655
MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
ArrayRef<Register> Ops);
+ /// Build and insert a vector splat of a scalar \p Src using a
+ /// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Src must have the same type as the element type of \p Dst
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src);
+
+ /// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1,
+ const SrcOp &Src2, ArrayRef<int> Mask);
+
/// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
///
/// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more
if (!DstTy.isVector())
return UnableToLegalize;
- if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits())
+ // Vector selects can have a scalar predicate. If so, splat into a vector and
+ // finish for later legalization attempts to try again.
+ if (MaskTy.isScalar()) {
+ Register MaskElt = MaskReg;
+ if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
+ MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
+ // Generate a vector splat idiom to be pattern matched later.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
return UnableToLegalize;
+ }
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+ const SrcOp &Src) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ assert(SrcTy == DstTy.getElementType() && "Expected Src to match Dst elt ty");
+ auto UndefVec = buildUndef(DstTy);
+ auto Zero = buildConstant(LLT::scalar(64), 0);
+ auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+ SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+ return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+ const SrcOp &Src1,
+ const SrcOp &Src2,
+ ArrayRef<int> Mask) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT Src1Ty = Src1.getLLTTy(*getMRI());
+ LLT Src2Ty = Src2.getLLTTy(*getMRI());
+ assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+ DstTy.getElementType() == Src2Ty.getElementType());
+ ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+ .addShuffleMask(MaskAlloc);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .minScalarEltSameAsIf(isVector(0), 1, 0)
+ .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
// Pointer-handling
$q0 = COPY %5(<16 x s8>)
RET_ReallyLR implicit $q0
...
+---
+name: scalar_mask
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: scalar_mask
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT_INREG]](s32), [[C2]](s64)
+ ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0)
+ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+ ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+ ; CHECK: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]]
+ ; CHECK: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]]
+ ; CHECK: $q0 = COPY [[OR]](<4 x s32>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:_(s32) = COPY $w0
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(s32) = G_CONSTANT i32 4100
+ %6:_(s32) = G_FCONSTANT float 0.000000e+00
+ %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
+ %3:_(s1) = G_ICMP intpred(eq), %0(s32), %2
+ %4:_(<4 x s32>) = G_SELECT %3(s1), %1, %5
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...