if (llvm::all_of(Ops, [Op0](SDValue Op) {
return Op.getOpcode() == Op0.getOpcode();
})) {
- auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
+ auto ConcatSubOperand = [&](EVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
SmallVector<SDValue> Subs;
for (SDValue SubOp : SubOps)
Subs.push_back(SubOp.getOperand(I));
}
}
break;
+ case ISD::TRUNCATE:
+ if (!IsSplat && NumOps == 2 && VT.is256BitVector()) {
+ EVT SrcVT = Ops[0].getOperand(0).getValueType();
+ if (SrcVT.is256BitVector() && SrcVT.isSimple() &&
+ SrcVT == Ops[1].getOperand(0).getValueType() &&
+ Subtarget.useAVX512Regs() &&
+ Subtarget.getPreferVectorWidth() >= 512 &&
+ (SrcVT.getScalarSizeInBits() > 16 || Subtarget.useBWIRegs())) {
+ EVT NewSrcVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ ConcatSubOperand(NewSrcVT, Ops, 0));
+ }
+ }
+ break;
case X86ISD::VSHLI:
case X86ISD::VSRLI:
// Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
-; AVX512F-LABEL: trunc2x4i64_8i32:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc2x4i64_8i32:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1
-; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: trunc2x4i64_8i32:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512BWVL-LABEL: trunc2x4i64_8i32:
-; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1
-; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc2x4i64_8i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: retq
entry:
%0 = trunc <4 x i64> %a to <4 x i32>
%1 = trunc <4 x i64> %b to <4 x i32>
;
; AVX512BW-LABEL: trunc2x16i16_32i8:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc2x16i16_32i8:
; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %ymm1, %xmm1
-; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
entry:
%0 = trunc <16 x i16> %a to <16 x i8>