KnownZero.insertBits(SubZero, SubIdx);
break;
}
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+
+ // For a legal, constant insertion index, if we don't need this insertion
+ // then strip it, else remove it from the demanded elts.
+ if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ DemandedElts.clearBit(Idx);
+
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownUndef.clearBit(Idx);
+ if (Scl.isUndef())
+ KnownUndef.setBit(Idx);
+
+ KnownZero.clearBit(Idx);
+ if (isNullConstant(Scl) || isNullFPConstant(Scl))
+ KnownZero.setBit(Idx);
+ break;
+ }
+
+ APInt VecUndef, VecZero;
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
+ Depth + 1))
+ return true;
+ // Without knowing the insertion index we can't set KnownUndef/KnownZero.
+ break;
+ }
case ISD::VSELECT: {
APInt DemandedLHS(DemandedElts);
APInt DemandedRHS(DemandedElts);
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
+; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: movzwl %si, %ecx
-; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
+; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
; KNL_64-LABEL: test14:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
; KNL_64-NEXT: vpbroadcastq %xmm0, %zmm0
; KNL_64-NEXT: vmovd %esi, %xmm1
; KNL_64-NEXT: vpbroadcastd %xmm1, %ymm1
;
; KNL_32-LABEL: test14:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; KNL_32-NEXT: vpbroadcastd %xmm0, %zmm0
; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
; KNL_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
;
; SKX-LABEL: test14:
; SKX: # %bb.0:
-; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
; SKX-NEXT: vpbroadcastq %xmm0, %zmm0
; SKX-NEXT: vpbroadcastd %esi, %ymm1
; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
;
; SKX_32-LABEL: test14:
; SKX_32: # %bb.0:
-; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; SKX_32-NEXT: vpbroadcastd %xmm0, %zmm0
; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
; SKX_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
; X32-LABEL: mmx_movzl:
; X32: ## %bb.0:
; X32-NEXT: subl $20, %esp
-; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X32-NEXT: movl $32, %eax
-; X32-NEXT: pinsrd $0, %eax, %xmm0
-; X32-NEXT: pxor %xmm1, %xmm1
-; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; X32-NEXT: movq %xmm1, (%esp)
+; X32-NEXT: movd %eax, %xmm0
+; X32-NEXT: movq %xmm0, (%esp)
; X32-NEXT: movq (%esp), %mm0
; X32-NEXT: addl $20, %esp
; X32-NEXT: retl