unsigned Opcode;
int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
if (ShiftAmt >= 0) {
- // FIXME: We can't easily widen an illegal right shift if we need to shift
- // in zeroes.
- if (Opcode == X86ISD::KSHIFTR &&
- (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)))
- return DAG.getNode(Opcode, DL, VT, V,
- DAG.getConstant(ShiftAmt, DL, MVT::i8));
- if (Opcode == X86ISD::KSHIFTL) {
- // If this is a shift left we can widen the VT to a suported kshiftl.
- MVT WideVT = VT;
- if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
- WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
- DAG.getUNDEF(WideVT), V,
- DAG.getIntPtrConstant(0, DL));
- Res = DAG.getNode(Opcode, DL, WideVT, Res,
- DAG.getConstant(ShiftAmt, DL, MVT::i8));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- }
+ MVT WideVT = VT;
+ if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
+ WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
+ DAG.getUNDEF(WideVT), V,
+ DAG.getIntPtrConstant(0, DL));
+ // Widened right shifts need two shifts to ensure we shift in zeroes.
+ if (Opcode == X86ISD::KSHIFTR && WideVT != VT) {
+ int WideElts = WideVT.getVectorNumElements();
+ // Shift left to put the original vector in the MSBs of the new size.
+ Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
+ DAG.getConstant(WideElts - NumElts, DL, MVT::i8));
+ // Increase the shift amount to account for the left shift.
+ ShiftAmt += WideElts - NumElts;
+ }
+
+ Res = DAG.getNode(Opcode, DL, WideVT, Res,
+ DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
Offset += NumElts; // Increment for next iteration.
}
define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; KNL-LABEL: kshiftr_v8i1_1:
; KNL: # %bb.0:
-; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,2,3,4,5,6,7,15]
-; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
-; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT: vptestmq %zmm3, %zmm3, %k0 {%k1}
+; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kshiftlw $8, %k0, %k0
+; KNL-NEXT: kshiftrw $9, %k0, %k1
+; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill: def $al killed $al killed $eax
; KNL-NEXT: vzeroupper
define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
; KNL-LABEL: kshiftr_v8i1_1u3u567z:
; KNL: # %bb.0:
-; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <1,u,3,u,5,6,7,15>
-; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
-; KNL-NEXT: vpsllq $63, %zmm3, %zmm0
-; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kshiftlw $8, %k0, %k0
+; KNL-NEXT: kshiftrw $9, %k0, %k1
+; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill: def $al killed $al killed $eax
; KNL-NEXT: vzeroupper