From f9a89b67885b21a2d0c76a7d6dd8ee9038cf28ac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2019 18:02:56 +0000 Subject: [PATCH] [X86] Simplify b2b KSHIFTL+KSHIFTR using demanded elts. llvm-svn: 372155 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++++++++++++++++++++---- llvm/test/CodeGen/X86/avx512-ext.ll | 24 +++---- llvm/test/CodeGen/X86/avx512-mask-op.ll | 39 ++++-------- llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 3 +- llvm/test/CodeGen/X86/vec_smulo.ll | 3 +- llvm/test/CodeGen/X86/vec_umulo.ll | 3 +- 6 files changed, 90 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4a6aeb7..36d89a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34627,29 +34627,82 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // TODO convert SrcUndef to KnownUndef. break; } - case X86ISD::KSHIFTL: - case X86ISD::KSHIFTR: { + case X86ISD::KSHIFTL: { SDValue Src = Op.getOperand(0); auto *Amt = cast(Op.getOperand(1)); assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); unsigned ShiftAmt = Amt->getZExtValue(); - bool ShiftLeft = (X86ISD::KSHIFTL == Opc); - APInt DemandedSrc = - ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt); + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTR) { + if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned Opc = X86ISD::KSHIFTL; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = X86ISD::KSHIFTR; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); + } + } + + APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, Depth + 1)) return true; - if (ShiftLeft) { - KnownUndef = KnownUndef.shl(ShiftAmt); - KnownZero = KnownZero.shl(ShiftAmt); - KnownZero.setLowBits(ShiftAmt); - } else { - KnownUndef = KnownUndef.lshr(ShiftAmt); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownZero.setHighBits(ShiftAmt); + KnownUndef = KnownUndef.shl(ShiftAmt); + KnownZero = KnownZero.shl(ShiftAmt); + KnownZero.setLowBits(ShiftAmt); + break; + } + case X86ISD::KSHIFTR: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTL) { + if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned Opc = X86ISD::KSHIFTR; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = X86ISD::KSHIFTL; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); + } } + + APInt DemandedSrc = DemandedElts.shl(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + KnownUndef = KnownUndef.lshr(ShiftAmt); + KnownZero = KnownZero.lshr(ShiftAmt); + KnownZero.setHighBits(ShiftAmt); break; } case X86ISD::CVTSI2P: diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 87fee83..20af819 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1974,8 +1974,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kxorw %k3, %k2, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $1, %k2, %k2 +; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 @@ -2074,8 +2073,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kxorw %k4, %k3, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $1, %k3, %k3 +; KNL-NEXT: kshiftlw $14, %k3, %k3 ; KNL-NEXT: kxorw %k3, %k2, %k2 ; KNL-NEXT: kshiftlw $1, %k2, %k2 ; KNL-NEXT: kshiftrw $1, %k2, %k2 @@ -2174,8 +2172,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kxorw %k5, %k4, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $1, %k4, %k4 +; KNL-NEXT: kshiftlw $14, %k4, %k4 ; KNL-NEXT: kxorw %k4, %k3, %k3 ; KNL-NEXT: kshiftlw $1, %k3, %k3 ; KNL-NEXT: kshiftrw $1, %k3, %k3 @@ -2274,8 +2271,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kxorw %k5, %k4, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $1, %k4, %k4 +; KNL-NEXT: kshiftlw $14, %k4, %k4 ; KNL-NEXT: kxorw %k4, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 @@ -2396,8 +2392,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k3 ; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 ; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 @@ -2496,8 +2491,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k4 ; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3 ; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2 @@ -2596,8 +2590,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 ; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4 ; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k3, %k3 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3 @@ -2696,8 +2689,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 ; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4 ; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 3e59e6e..a28816d 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1283,8 +1283,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: kshiftrw $6, %k0, %k3 ; KNL-NEXT: kxorw %k1, %k3, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 +; KNL-NEXT: kshiftlw $6, %k1, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $9, %k0, %k0 ; KNL-NEXT: kshiftrw $9, %k0, %k0 @@ -1304,8 +1303,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; SKX-NEXT: kshiftrw $9, %k1, %k1 ; SKX-NEXT: kshiftrb $6, %k0, %k3 ; SKX-NEXT: kxorb %k1, %k3, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: kshiftlb $6, %k1, %k1 ; SKX-NEXT: kxorb %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $1, %k0, %k0 ; SKX-NEXT: kshiftrb $1, %k0, %k0 @@ -1322,8 +1320,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 ; AVX512BW-NEXT: kshiftrw $6, %k0, %k3 ; AVX512BW-NEXT: kxorw %k1, %k3, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 +; AVX512BW-NEXT: kshiftlw $6, %k1, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0 @@ -1342,8 +1339,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3 ; AVX512DQ-NEXT: kxorb %k1, %k3, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 +; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 @@ -1363,8 +1359,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; X86-NEXT: kshiftrw $9, %k1, %k1 ; X86-NEXT: kshiftrb $6, %k0, %k3 ; X86-NEXT: kxorb %k1, %k3, %k1 -; X86-NEXT: kshiftlb $7, %k1, %k1 -; X86-NEXT: kshiftrb $1, %k1, %k1 +; X86-NEXT: kshiftlb $6, %k1, %k1 ; X86-NEXT: kxorb %k1, %k0, %k0 ; X86-NEXT: kshiftlb $1, %k0, %k0 ; X86-NEXT: kshiftrb $1, %k0, %k0 @@ -2842,8 +2837,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kxorw %k3, %k2, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $1, %k2, %k2 +; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: kxorw %k2, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 @@ -2942,8 +2936,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kxorw %k4, %k3, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $1, %k3, %k3 +; KNL-NEXT: kshiftlw $14, %k3, %k3 ; KNL-NEXT: kxorw %k3, %k2, %k2 ; KNL-NEXT: kshiftlw $1, %k2, %k2 ; KNL-NEXT: kshiftrw $1, %k2, %k2 @@ -3042,8 +3035,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kxorw %k5, %k4, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $1, %k4, %k4 +; KNL-NEXT: kshiftlw $14, %k4, %k4 ; KNL-NEXT: kxorw %k4, %k3, %k3 ; KNL-NEXT: kshiftlw $1, %k3, %k3 ; KNL-NEXT: kshiftrw $1, %k3, %k3 @@ -3142,8 +3134,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kxorw %k5, %k4, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $1, %k4, %k4 +; KNL-NEXT: kshiftlw $14, %k4, %k4 ; KNL-NEXT: kxorw %k4, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 @@ -3264,8 +3255,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k3 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2 -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 @@ -3364,8 +3354,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3 -; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 -; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3 +; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2 ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 ; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2 @@ -3464,8 +3453,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4 -; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4 +; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3 ; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3 ; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3 @@ -3564,8 +3552,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4 -; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4 +; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 6e353d0..f7ee5b2 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -23526,8 +23526,7 @@ define i8 @mask_zero_lower(<4 x i32> %a) { ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kshiftlw $4, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index a554ad0..b809e55 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -2252,8 +2252,7 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX512-NEXT: kxorw %k0, %k2, %k2 ; AVX512-NEXT: kshiftrw $2, %k2, %k3 ; AVX512-NEXT: kxorw %k1, %k3, %k1 -; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $13, %k1, %k1 +; AVX512-NEXT: kshiftlw $2, %k1, %k1 ; AVX512-NEXT: kxorw %k1, %k2, %k1 ; AVX512-NEXT: kshiftlw $13, %k1, %k1 ; AVX512-NEXT: kshiftrw $13, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll index dfe720f..07899d0 100644 --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -1988,8 +1988,7 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX512-NEXT: kxorw %k0, %k2, %k2 ; AVX512-NEXT: kshiftrw $2, %k2, %k3 ; AVX512-NEXT: kxorw %k1, %k3, %k1 -; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $13, %k1, %k1 +; AVX512-NEXT: kshiftlw $2, %k1, %k1 ; AVX512-NEXT: kxorw %k1, %k2, %k1 ; AVX512-NEXT: kshiftlw $13, %k1, %k1 ; AVX512-NEXT: kshiftrw $13, %k1, %k1 -- 2.7.4