const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT ShuffleVT = N.getValueType();
- auto IsMergeableWithShuffle = [](SDValue Op, bool FoldLoad = false) {
+ auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) {
// AllZeros/AllOnes constants are freely shuffled and will peek through
// bitcasts. Other constant build vectors do not peek through bitcasts. Only
// merge with target shuffles if it has one use so shuffle combining is
- // likely to kick in.
+ // likely to kick in. Shuffles of splats are expected to be removed.
return ISD::isBuildVectorAllOnes(Op.getNode()) ||
ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
(isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
- (FoldLoad && isShuffleFoldableLoad(Op));
+ (FoldLoad && isShuffleFoldableLoad(Op)) ||
+ DAG.isSplatValue(Op, /*AllowUndefs*/ false);
};
auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) {
// Ensure we only shuffle whole vector src elements, unless its a logical
; KNL-LABEL: fsub_noundef_ee:
; KNL: # %bb.0:
; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0
-; KNL-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
-; KNL-NEXT: vsubpd %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; KNL-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: fsub_noundef_ee:
;
; AVX2-LABEL: all_sign_bit_ashr_vec1:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX2-LABEL: all_sign_bit_ashr_vec3:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq