return true;
}
-// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
-// mask.
-// TODO: Do we need this? It might be better to use Mask+Zeroable directly.
-static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
- const APInt &Zeroable) {
- int NumElts = Mask.size();
- assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
-
- SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
- for (int i = 0; i != NumElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
- TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
- }
- return TargetMask;
-}
-
// Attempt to create a shuffle mask from a VSELECT condition mask.
static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
SDValue Cond) {
SelectionDAG &DAG);
static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
- MutableArrayRef<int> TargetMask,
- bool &ForceV1Zero, bool &ForceV2Zero,
- uint64_t &BlendMask) {
+ MutableArrayRef<int> Mask,
+ const APInt &Zeroable, bool &ForceV1Zero,
+ bool &ForceV2Zero, uint64_t &BlendMask) {
bool V1IsZeroOrUndef =
V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZeroOrUndef =
BlendMask = 0;
ForceV1Zero = false, ForceV2Zero = false;
- assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
+ assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
- // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
- int M = TargetMask[i];
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
if (M == i)
BlendMask |= 1ull << i;
continue;
}
- if (M == SM_SentinelZero) {
+ if (Zeroable[i]) {
if (V1IsZeroOrUndef) {
ForceV1Zero = true;
- TargetMask[i] = i;
+ Mask[i] = i;
continue;
}
if (V2IsZeroOrUndef) {
ForceV2Zero = true;
BlendMask |= 1ull << i;
- TargetMask[i] = i + Size;
+ Mask[i] = i + Size;
continue;
}
}
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
-
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
- if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
+ SmallVector<int, 64> Mask(Original.begin(), Original.end());
+ if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
BlendMask))
return SDValue();
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
- if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
- BlendMask)) {
+ if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
+ ForceV2Zero, BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
SmallVector<int, 8> RepeatedMask;
;
; AVX1-LABEL: packsswb_icmp_zero_trunc_256:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = zero,zero,ymm0[0,1]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;