SDLoc dl(InputVector);
bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))
return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
uint64_t Idx = CIdx->getZExtValue();
if (UndefVecElts[Idx])
return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
- return DAG.getConstant(EltBits[Idx].zext(VT.getScalarSizeInBits()), dl,
- VT);
+ return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT);
+ }
+
+ // Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()).
+ // Improves lowering of bool masks on rust which splits them into byte array.
+ if (InputVector.getOpcode() == ISD::BITCAST && (NumEltBits % 8) == 0) {
+ SDValue Src = peekThroughBitcasts(InputVector);
+ if (Src.getValueType().getScalarType() == MVT::i1 &&
+ TLI.isTypeLegal(Src.getValueType())) {
+ MVT SubVT = MVT::getVectorVT(MVT::i1, NumEltBits);
+ SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Src,
+ DAG.getIntPtrConstant(CIdx->getZExtValue() * NumEltBits, dl));
+ return DAG.getBitcast(VT, Sub);
+ }
}
}
if (IsPextr) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnes(VT.getSizeInBits()), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
+ DCI))
return SDValue(N, 0);
// PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling).
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
-; AVX-LABEL: bitcast_v16i8_to_v2i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovmskb %xmm0, %ecx
-; AVX-NEXT: movl %ecx, %eax
-; AVX-NEXT: shrl $8, %eax
-; AVX-NEXT: addb %cl, %al
-; AVX-NEXT: # kill: def $al killed $al killed $eax
-; AVX-NEXT: retq
+; AVX12-LABEL: bitcast_v16i8_to_v2i8:
+; AVX12: # %bb.0:
+; AVX12-NEXT: vpmovmskb %xmm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrl $8, %eax
+; AVX12-NEXT: addb %cl, %al
+; AVX12-NEXT: # kill: def $al killed $al killed $eax
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: bitcast_v16i8_to_v2i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm0, %k0
+; AVX512-NEXT: kshiftrw $8, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
+; AVX512-NEXT: addb %cl, %al
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%1 = icmp slt <16 x i8> %a0, zeroinitializer
%2 = bitcast <16 x i1> %1 to <2 x i8>
%3 = extractelement <2 x i8> %2, i32 0
; AVX512-LABEL: bitcast_v16i16_to_v2i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovw2m %ymm0, %k0
-; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vmovd %xmm0, %ecx
-; AVX512-NEXT: vpextrb $1, %xmm0, %eax
+; AVX512-NEXT: kshiftrw $8, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
;
; AVX512-LABEL: bitcast_v32i8_to_v2i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovmskb %ymm0, %ecx
-; AVX512-NEXT: movl %ecx, %eax
-; AVX512-NEXT: shrl $16, %eax
+; AVX512-NEXT: vpmovb2m %ymm0, %k0
+; AVX512-NEXT: kshiftrd $16, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
-; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vmovd %xmm0, %ecx
-; AVX512-NEXT: vpextrb $1, %xmm0, %eax
+; AVX512-NEXT: kshiftrw $8, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-LABEL: bitcast_v32i16_to_v2i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovw2m %zmm0, %k0
-; AVX512-NEXT: kmovd %k0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vmovd %xmm0, %ecx
-; AVX512-NEXT: vpextrw $1, %xmm0, %eax
+; AVX512-NEXT: kshiftrd $16, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-LABEL: bitcast_v64i8_to_v2i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovb2m %zmm0, %k0
-; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT: addl -{{[0-9]+}}(%rsp), %eax
+; AVX512-NEXT: kshiftrq $32, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: kmovd %k1, %eax
+; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = icmp slt <64 x i8> %a0, zeroinitializer
; AVX512: # %bb.0:
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vcmpunordps %zmm1, %zmm0, %k0
-; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: vpextrb $1, %xmm0, %edx
+; AVX512-NEXT: kshiftrw $8, %k0, %k1
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: kmovd %k1, %edx
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: # kill: def $dl killed $dl killed $edx
; AVX512-NEXT: vzeroupper