}
if (Subtarget.hasVBMI2()) {
- for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
+ for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
- if (Subtarget.hasVBMI2()) {
- // TODO: Make these legal even without VLX?
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
- }
- }
-
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
if (IsFSHR)
std::swap(Op0, Op1);
+ // With AVX512, but not VLX we need to widen to get a 512-bit result type.
+ if (!Subtarget.hasVLX() && !VT.is512BitVector()) {
+ Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
+ Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512);
+ }
+
+ SDValue Funnel;
APInt APIntShiftAmt;
+ MVT ResultVT = Op0.getSimpleValueType();
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
- Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
- }
-
- return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
- Op0, Op1, Amt);
+ Funnel =
+ DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0,
+ Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ } else {
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512);
+ Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL,
+ ResultVT, Op0, Op1, Amt);
+ }
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits());
+ return Funnel;
}
assert(
(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
;
; AVX512VBMI2-LABEL: var_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: var_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvd %xmm3, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14]
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8]
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $50, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllq $14, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $28, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpslld $4, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $9, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
;
; AVX512VBMI2-LABEL: var_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: var_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2
-; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvq %ymm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpslld %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3
-; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60]
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11]
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $50, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllq $14, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $28, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpslld $4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $9, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: var_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: var_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpsrld %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14]
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i32:
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8]
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $14, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllq $50, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpslld $28, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllw $9, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
;
; AVX512VBMI2-LABEL: var_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: var_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2
-; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvd %ymm4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpsrld %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3
-; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60]
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11]
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v8i32:
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v16i16:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $14, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllq $50, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpslld $28, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $7, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllw $9, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: