SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
if (Op.getSimpleValueType() == MVT::i1)
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return SetCC;
}
return false;
}
-/// Returns the "condition" node, that may be wrapped with "truncate".
-/// Like this: (i1 (trunc (i8 X86ISD::SETCC))).
-static SDValue getCondAfterTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
- return V;
+ return false;
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
- if (DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)))
- return V.getOperand(0);
- return V;
+ return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
}
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
// Look past the truncate if the high bits are known zero.
- Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (addTest) {
// Look pass the truncate if the high bits are known zero.
- Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result is compared against zero. Try to match it to BT.
if (Cond.hasOneUse()) {
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
SDValue SetCC =
- DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
DAG.getConstant(X86::COND_O, DL, MVT::i32),
SDValue(Sum.getNode(), 2));
- if (N->getValueType(1) == MVT::i1)
- SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
- DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
DAG.getConstant(Cond, DL, MVT::i32),
SDValue(Sum.getNode(), 1));
- if (N->getValueType(1) == MVT::i1)
+ if (N->getValueType(1) == MVT::i1) {
+ SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
+ DAG.getValueType(MVT::i1));
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+ }
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(const SDLoc &DL, SDValue EFLAGS,
- SelectionDAG &DAG) {
- return DAG.getNode(ISD::AND, DL, MVT::i8,
+ SelectionDAG &DAG, MVT VT) {
+ if (VT == MVT::i8)
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ EFLAGS),
+ DAG.getConstant(1, DL, VT));
+ assert (VT == MVT::i1 && "Unexpected type for SECCC node");
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS),
- DAG.getConstant(1, DL, MVT::i8));
+ EFLAGS));
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return MaterializeSETB(DL, NewEFLAGS, DAG);
+ return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
}
}
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
if (CC == X86::COND_B)
- return MaterializeSETB(DL, EFLAGS, DAG);
+ return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
(KMOVQkm addr:$src)>;
}
-def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{
- return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
-}]>;
-
-def trunc_setcc : PatFrag<(ops node:$src), (trunc node:$src), [{
- return (N->getOperand(0)->getOpcode() == X86ISD::SETCC);
-}]>;
-
-def trunc_mask_1 : PatFrag<(ops node:$src), (trunc node:$src), [{
- return (N->getOperand(0)->getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)) &&
- N->getOperand(0)->getConstantOperandVal(1) == 1);
-}]>;
-
-
let Predicates = [HasAVX512] in {
def : Pat<(i1 (trunc (i64 GR64:$src))),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND64ri8 $src, (i64 1)),
- sub_16bit)), VK1)>;
-
- def : Pat<(i1 (trunc (i64 (assertzext_i1 GR64:$src)))),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
-
- def : Pat<(i1 (trunc_mask_1 GR64:$src)),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
+ (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1))), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND32ri8 $src, (i32 1)),
- sub_16bit)), VK1)>;
-
- def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
-
- def : Pat<(i1 (trunc_mask_1 GR32:$src)),
- (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
+ (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
def : Pat<(i1 (trunc (i8 GR8:$src))),
- (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri $src, (i8 1)),
- sub_8bit)), VK1)>;
-
- def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))),
- (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
-
- def : Pat<(i1 (trunc_setcc GR8:$src)),
- (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
-
- def : Pat<(i1 (trunc_mask_1 GR8:$src)),
- (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
-
+ (COPY_TO_REGCLASS
+ (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
+ VK1)>;
def : Pat<(i1 (trunc (i16 GR16:$src))),
- (COPY_TO_REGCLASS (AND16ri GR16:$src, (i16 1)), VK1)>;
-
- def : Pat<(i1 (trunc (i16 (assertzext_i1 GR16:$src)))),
- (COPY_TO_REGCLASS $src, VK1)>;
-
- def : Pat<(i1 (trunc_mask_1 GR16:$src)),
- (COPY_TO_REGCLASS $src, VK1)>;
+ (COPY_TO_REGCLASS
+ (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
+ VK1)>;
def : Pat<(i32 (zext VK1:$src)),
- (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
- sub_16bit))>;
-
+ (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
- (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
- sub_16bit))>;
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
def : Pat<(i8 (zext VK1:$src)),
- (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS VK1:$src, GR16)), sub_8bit))>;
-
+ (EXTRACT_SUBREG
+ (AND32ri8 (KMOVWrk
+ (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
def : Pat<(i8 (anyext VK1:$src)),
- (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS $src, GR16)), sub_8bit))>;
+ (EXTRACT_SUBREG
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
def : Pat<(i64 (zext VK1:$src)),
- (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
- sub_16bit))>;
-
+ (AND64ri8 (SUBREG_TO_REG (i64 0),
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
def : Pat<(i64 (anyext VK1:$src)),
- (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
- sub_16bit))>;
+ (SUBREG_TO_REG (i64 0),
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>;
def : Pat<(i16 (zext VK1:$src)),
- (COPY_TO_REGCLASS $src, GR16)>;
-
+ (EXTRACT_SUBREG
+ (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
+ sub_16bit)>;
def : Pat<(i16 (anyext VK1:$src)),
- (i16 (COPY_TO_REGCLASS $src, GR16))>;
+ (EXTRACT_SUBREG
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
+ sub_16bit)>;
}
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK16)>;
; ALL-NEXT: kmovw %edx, %k0
; ALL-NEXT: cmpq %rsi, %rdi
; ALL-NEXT: sete %al
+; ALL-NEXT: andl $1, %eax
; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: korw %k1, %k0, %k1
; ALL-NEXT: kxorw %k1, %k0, %k0
; ALL-NEXT: kmovw %k0, %eax
+; ALL-NEXT: andl $1, %eax
; ALL-NEXT: testb %al, %al
; ALL-NEXT: je LBB8_1
; ALL-NEXT: ## BB#2: ## %if.end.i
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r15d, %xmm4
-; KNL-NEXT: kmovw %k0, %r15d
+; KNL-NEXT: vmovd %r13d, %xmm4
+; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vpinsrb $4, %r8d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k0, %r13d
+; KNL-NEXT: vpinsrb $5, %edi, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %esi, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %edi, %xmm4, %xmm4
+; KNL-NEXT: vpinsrb $7, %esi, %xmm4, %xmm4
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: vpinsrb $8, %ebx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: vpinsrb $9, %ebp, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %r10d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: vpinsrb $10, %r14d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %r11d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: vpinsrb $11, %r15d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %ebx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: vpinsrb $12, %r12d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edi
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %ebp, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: vpinsrb $13, %r10d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: vpinsrb $14, %r9d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r14d
+; KNL-NEXT: vpinsrb $15, %r13d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %eax, %xmm5
-; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: vmovd %ecx, %xmm5
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $2, %r11d, %xmm5, %xmm5
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: vpinsrb $3, %r8d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $7, %ebx, %xmm5, %xmm5
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: vpinsrb $8, %ebp, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: vpinsrb $9, %r14d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: vpinsrb $10, %r15d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: vpinsrb $11, %edi, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: vpinsrb $12, %r10d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: vpinsrb $13, %r9d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r14d
+; KNL-NEXT: vpinsrb $15, %r13d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %eax, %xmm6
-; KNL-NEXT: kmovw %k0, %r15d
+; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r13d
+; KNL-NEXT: vpinsrb $4, %r8d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $5, %edx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: vpinsrb $7, %ebp, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: vpinsrb $8, %ebx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: vpinsrb $9, %r11d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: vpinsrb $10, %edi, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %edi
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: vpinsrb $11, %r10d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: vpinsrb $12, %r9d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: vpinsrb $13, %r14d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r14d
+; KNL-NEXT: vpinsrb $14, %r15d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftrw $15, %k1, %k0
-; KNL-NEXT: vmovd %r12d, %xmm7
+; KNL-NEXT: vpinsrb $15, %r12d, %xmm6, %xmm6
; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $2, %edx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $3, %r13d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $4, %eax, %xmm7, %xmm7
+; KNL-NEXT: kshiftrw $15, %k1, %k0
+; KNL-NEXT: vmovd %eax, %xmm7
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $1, %r13d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $3, %r8d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $4, %edx, %xmm7, %xmm7
; KNL-NEXT: vpinsrb $5, %esi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $6, %edi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $7, %r8d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $8, %r9d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $9, %ebx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $11, %r10d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $12, %r11d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $13, %r14d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $6, %ebp, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $7, %ebx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $8, %r11d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $9, %edi, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $10, %r10d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $11, %r9d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $12, %r14d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $13, %r15d, %xmm7, %xmm7
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
-; KNL-NEXT: vpinsrb $14, %r15d, %xmm7, %xmm4
-; KNL-NEXT: vpinsrb $15, %r12d, %xmm4, %xmm4
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm4
+; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB10_2
; KNL-NEXT: ## BB#1: ## %A
; SKX-NEXT: kshiftlw $11, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je LBB10_2
; SKX-NEXT: ## BB#1: ## %A
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
+; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: movw $-4, %ax
; KNL-NEXT: kmovw %eax, %k1
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; SKX-NEXT: kshiftlb $3, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
}
define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) {
+; KNL-LABEL: test_insertelement_v32i1:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rbp
+; KNL-NEXT: Ltmp0:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: Ltmp1:
+; KNL-NEXT: .cfi_offset %rbp, -16
+; KNL-NEXT: movq %rsp, %rbp
+; KNL-NEXT: Ltmp2:
+; KNL-NEXT: .cfi_def_cfa_register %rbp
+; KNL-NEXT: andq $-32, %rsp
+; KNL-NEXT: subq $32, %rsp
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
+; KNL-NEXT: kshiftlw $14, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kshiftlw $15, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vmovd %ecx, %xmm1
+; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $13, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $12, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $11, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $10, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $9, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $6, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $5, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $4, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $3, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftlw $1, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
+; KNL-NEXT: kshiftlw $14, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kshiftlw $15, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vmovd %ecx, %xmm0
+; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $13, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $12, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $11, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $10, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $9, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $6, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $5, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $4, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $3, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftlw $1, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; KNL-NEXT: sbbl %eax, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
+; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, (%rsp)
+; KNL-NEXT: movl (%rsp), %eax
+; KNL-NEXT: movq %rbp, %rsp
+; KNL-NEXT: popq %rbp
+; KNL-NEXT: retq
+;
; SKX-LABEL: test_insertelement_v32i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k2
}
define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) {
+; KNL-LABEL: test_iinsertelement_v4i1:
+; KNL: ## BB#0:
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: setb %al
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpextrd $1, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z}
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k2} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,8,2,3,4,5,6,7]
+; KNL-NEXT: vpermt2q %zmm2, %zmm4, %zmm3
+; KNL-NEXT: vpsllq $63, %zmm3, %zmm2
+; KNL-NEXT: vptestmq %zmm2, %zmm2, %k2
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z}
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k1} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,8,3,4,5,6,7]
+; KNL-NEXT: vpermt2q %zmm3, %zmm4, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm2
+; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpextrd $3, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,8,4,5,6,7]
+; KNL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
; SKX-LABEL: test_iinsertelement_v4i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1
; SKX-NEXT: vpmovm2d %k1, %xmm0
}
define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) {
+; KNL-LABEL: test_iinsertelement_v2i1:
+; KNL: ## BB#0:
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: setb %al
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; KNL-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
; SKX-LABEL: test_iinsertelement_v2i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
; SKX-NEXT: kshiftlw $1, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: sete %al
; SKX-NEXT: addb $3, %al
; SKX-NEXT: kshiftlw $12, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: retq
%t1 = icmp ugt <4 x i32> %a, %b
%t2 = extractelement <4 x i1> %t1, i32 3
; SKX-NEXT: kshiftld $29, %k0, %k0
; SKX-NEXT: kshiftrd $31, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: retq
%t1 = icmp ugt <32 x i8> %a, %b
%t2 = extractelement <32 x i1> %t1, i32 2
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftrq $63, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: sete %al
; SKX-NEXT: addb $3, %al
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: kortestw %k0, %k1
; CHECK-NEXT: sete %al
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
ret i32 %res
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
; CHECK-NEXT: kandw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1}
; CHECK-NEXT: kmovw %k2, %ecx
; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
-; CHECK-NEXT: kmovw %k1, %eax
-; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: kmovw %k1, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: andb %dl, %al
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
; CHECK-NEXT: kshiftlw $10, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
; CHECK-NEXT: kshiftlw $10, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
; CHECK-NEXT: kshiftlw $10, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
; SKX-NEXT: kmovq %rdi, %k0
; SKX-NEXT: cmpl %edx, %esi
; SKX-NEXT: setg %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k1
; SKX-NEXT: vpmovm2b %k1, %zmm0
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm2
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm1
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: addb %cl, %al
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: addb %cl, %al
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
;
; AVX512-LABEL: select_cmov_i16:
; AVX512: ## BB#0:
+; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmovew %dx, %si
;
; AVX512-LABEL: select_cmov_i32:
; AVX512: ## BB#0:
+; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmovel %edx, %esi
;
; AVX512-LABEL: select_cmov_i64:
; AVX512: ## BB#0:
+; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmoveq %rdx, %rsi
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test1:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test2:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test3:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test4:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
+; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test5:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
+; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; SKX-NEXT: vmovdqa64 %ymm2, %ymm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test6:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k2
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2}
+; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1}
+; SKX_32-NEXT: vmovdqa64 %ymm2, %ymm0
+; SKX_32-NEXT: retl
%a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test7:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2}
+; SKX_32-NEXT: vmovdqa64 %ymm1, %ymm2
+; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1}
+; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
; KNL_32-LABEL: test9:
; KNL_32: # BB#0: # %entry
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
-; KNL_32-NEXT: vpbroadcastd .LCPI8_0, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI8_1, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI8_2, %ymm1
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test9:
+; SKX_32: # BB#0: # %entry
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
+; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
+; SKX_32-NEXT: retl
entry:
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
; KNL_32-LABEL: test10:
; KNL_32: # BB#0: # %entry
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
-; KNL_32-NEXT: vpbroadcastd .LCPI9_0, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI9_1, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI9_2, %ymm1
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test10:
+; SKX_32: # BB#0: # %entry
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
+; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
+; SKX_32-NEXT: retl
entry:
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test11:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test12:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test13:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
;
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
-; KNL_64: vpxor %ymm2, %ymm2, %ymm2
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
-; KNL_64-NEXT: # kill
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
-; KNL_32: vpxor %ymm2, %ymm2, %ymm2
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
-; KNL_32-NEXT: # kill
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_32-NEXT: retl
;
; SKX-LABEL: test15:
;
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
-; KNL_64: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
;
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
-; KNL_32: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_32-NEXT: vpsllvq .LCPI15_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovapd %zmm2, %zmm0
;
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
;
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovapd %zmm2, %zmm0
;
; KNL_64-LABEL: test18:
; KNL_64: # BB#0:
-; KNL_64: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_64-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
;
; KNL_32-LABEL: test18:
; KNL_32: # BB#0:
-; KNL_32: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
;
; KNL_64-LABEL: test19:
; KNL_64: # BB#0:
-; KNL_64: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
;
; KNL_32-LABEL: test19:
; KNL_32: # BB#0:
-; KNL_32: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI18_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
; KNL_32-NEXT: retl
;
; KNL_64-LABEL: test20:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
;
; KNL_32-LABEL: test20:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
;
; SKX-LABEL: test20:
; SKX: # BB#0:
-; SKX: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
;
; KNL_64-LABEL: test21:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
;
; KNL_32-LABEL: test21:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; KNL_32-NEXT: vpsllvq .LCPI20_0, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test21:
; SKX: # BB#0:
-; SKX: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
;
; SKX_32-LABEL: test21:
; SKX_32: # BB#0:
-; SKX_32: vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
; SKX_32-NEXT: kshiftrb $6, %k0, %k1
;
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
;
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
;
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
;
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
-; KNL_64: movb $3, %al
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
-; KNL_32: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
-; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
;
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
;
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
;
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
-; KNL_64: movb $3, %al
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
;
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
-; KNL_32: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: # kill
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test27:
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: # kill
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_32-NEXT: retl
;
; SKX-LABEL: test27:
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test27:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: movb $3, %cl
+; SKX_32-NEXT: kmovb %ecx, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1}
+; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
%res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
;
; KNL_64-LABEL: test28:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
;
; KNL_32-LABEL: test28:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI27_1, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test28:
; SKX: # BB#0:
-; SKX: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
;
; SKX_32-LABEL: test28:
; SKX_32: # BB#0:
-; SKX_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: movb $3, %al
; SKX_32-NEXT: kmovb %eax, %k1
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test29:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: movw $44, %cx
+; SKX_32-NEXT: kmovw %ecx, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
; KNL_64-LABEL: test30:
; KNL_64: # BB#0:
; KNL_64-NEXT: andl $1, %edx
+; KNL_64-NEXT: kmovw %edx, %k1
; KNL_64-NEXT: andl $1, %esi
+; KNL_64-NEXT: kmovw %esi, %k2
; KNL_64-NEXT: movl %edi, %eax
; KNL_64-NEXT: andl $1, %eax
+; KNL_64-NEXT: kmovw %eax, %k0
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; KNL_64-NEXT: testb $1, %dil
; KNL_64-NEXT: je .LBB29_2
; KNL_64-NEXT: # BB#1: # %cond.load
-; KNL_64-NEXT: vmovq %xmm1, %rcx
+; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_64-NEXT: .LBB29_2: # %else
-; KNL_64-NEXT: testb %sil, %sil
+; KNL_64-NEXT: kmovw %k2, %eax
+; KNL_64-NEXT: movl %eax, %ecx
+; KNL_64-NEXT: andl $1, %ecx
+; KNL_64-NEXT: testb %cl, %cl
; KNL_64-NEXT: je .LBB29_4
; KNL_64-NEXT: # BB#3: # %cond.load1
; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx
; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_4: # %else2
+; KNL_64-NEXT: kmovw %k1, %ecx
+; KNL_64-NEXT: movl %ecx, %edx
+; KNL_64-NEXT: andl $1, %edx
; KNL_64-NEXT: testb %dl, %dl
; KNL_64-NEXT: je .LBB29_6
; KNL_64-NEXT: # BB#5: # %cond.load4
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL_64-NEXT: vmovq %xmm1, %rcx
-; KNL_64-NEXT: vpinsrd $2, (%rcx), %xmm0, %xmm0
+; KNL_64-NEXT: vmovq %xmm1, %rdx
+; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_6: # %else5
-; KNL_64-NEXT: vmovd %eax, %xmm1
-; KNL_64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
-; KNL_64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
+; KNL_64-NEXT: kmovw %k0, %edx
+; KNL_64-NEXT: vmovd %edx, %xmm1
+; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test30:
; KNL_32: # BB#0:
-; KNL_32-NEXT: pushl %ebx
-; KNL_32-NEXT: .Ltmp0:
-; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: pushl %esi
-; KNL_32-NEXT: .Ltmp1:
-; KNL_32-NEXT: .cfi_def_cfa_offset 12
-; KNL_32-NEXT: .Ltmp2:
-; KNL_32-NEXT: .cfi_offset %esi, -12
-; KNL_32-NEXT: .Ltmp3:
-; KNL_32-NEXT: .cfi_offset %ebx, -8
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: andl $1, %eax
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; KNL_32-NEXT: kmovw %eax, %k1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: andl $1, %eax
+; KNL_32-NEXT: kmovw %eax, %k2
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movl %eax, %ecx
; KNL_32-NEXT: andl $1, %ecx
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; KNL_32-NEXT: movl %ebx, %edx
-; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: kmovw %ecx, %k0
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; KNL_32-NEXT: # implicit-def: %XMM0
-; KNL_32-NEXT: testb $1, %bl
+; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB29_2
; KNL_32-NEXT: # BB#1: # %cond.load
-; KNL_32-NEXT: vmovd %xmm1, %esi
+; KNL_32-NEXT: vmovd %xmm1, %eax
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_32-NEXT: .LBB29_2: # %else
+; KNL_32-NEXT: kmovw %k2, %eax
+; KNL_32-NEXT: movl %eax, %ecx
+; KNL_32-NEXT: andl $1, %ecx
; KNL_32-NEXT: testb %cl, %cl
; KNL_32-NEXT: je .LBB29_4
; KNL_32-NEXT: # BB#3: # %cond.load1
-; KNL_32-NEXT: vpextrd $1, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
+; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_4: # %else2
-; KNL_32-NEXT: testb %al, %al
+; KNL_32-NEXT: kmovw %k1, %ecx
+; KNL_32-NEXT: movl %ecx, %edx
+; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: testb %dl, %dl
; KNL_32-NEXT: je .LBB29_6
; KNL_32-NEXT: # BB#5: # %cond.load4
-; KNL_32-NEXT: vpextrd $2, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $2, %xmm1, %edx
+; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_6: # %else5
+; KNL_32-NEXT: kmovw %k0, %edx
; KNL_32-NEXT: vmovd %edx, %xmm1
-; KNL_32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; KNL_32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
-; KNL_32-NEXT: popl %esi
-; KNL_32-NEXT: popl %ebx
; KNL_32-NEXT: retl
;
; SKX-LABEL: test30:
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: # implicit-def: %XMM0
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: # implicit-def: %XMM1
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_2
; SKX-NEXT: # BB#1: # %cond.load
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX-NEXT: .LBB29_2: # %else
; SKX-NEXT: kshiftlw $14, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_4
; SKX-NEXT: # BB#3: # %cond.load1
-; SKX-NEXT: vpextrq $1, %xmm1, %rax
-; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vpextrq $1, %xmm0, %rax
+; SKX-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1
; SKX-NEXT: .LBB29_4: # %else2
; SKX-NEXT: kshiftlw $13, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_6
; SKX-NEXT: # BB#5: # %cond.load4
-; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm1
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm1
; SKX-NEXT: .LBB29_6: # %else5
-; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1}
+; SKX-NEXT: vpblendmd %xmm1, %xmm3, %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test30:
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
-; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; SKX_32-NEXT: kmovw %k0, %eax
-; SKX_32-NEXT: # implicit-def: %XMM0
+; SKX_32-NEXT: andl $1, %eax
+; SKX_32-NEXT: # implicit-def: %XMM1
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_2
; SKX_32-NEXT: # BB#1: # %cond.load
-; SKX_32-NEXT: vmovd %xmm1, %eax
-; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX_32-NEXT: vmovd %xmm0, %eax
+; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX_32-NEXT: .LBB29_2: # %else
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_4
; SKX_32-NEXT: # BB#3: # %cond.load1
-; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
-; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
+; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
+; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB29_4: # %else2
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_6
; SKX_32-NEXT: # BB#5: # %cond.load4
-; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
-; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
+; SKX_32-NEXT: vpextrd $2, %xmm0, %eax
+; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB29_6: # %else5
-; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
+; SKX_32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: retl
; KNL_32-LABEL: test_gather_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp4:
+; KNL_32-NEXT: .Ltmp0:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp5:
+; KNL_32-NEXT: .Ltmp1:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp6:
+; KNL_32-NEXT: .Ltmp2:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
; KNL_32-LABEL: test_gather_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp7:
+; KNL_32-NEXT: .Ltmp3:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp8:
+; KNL_32-NEXT: .Ltmp4:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp9:
+; KNL_32-NEXT: .Ltmp5:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
; KNL_32-LABEL: test_scatter_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp10:
+; KNL_32-NEXT: .Ltmp6:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp11:
+; KNL_32-NEXT: .Ltmp7:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp12:
+; KNL_32-NEXT: .Ltmp8:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
; KNL_32-LABEL: test_scatter_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp13:
+; KNL_32-NEXT: .Ltmp9:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp14:
+; KNL_32-NEXT: .Ltmp10:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp15:
+; KNL_32-NEXT: .Ltmp11:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
; AVX512F-NEXT: kshiftlw $15, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: ## implicit-def: %XMM0
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_2
; AVX512F-NEXT: kshiftlw $14, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_4
; AVX512F-NEXT: ## BB#3: ## %cond.load1
; AVX512F-NEXT: kshiftlw $13, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_6
; AVX512F-NEXT: ## BB#5: ## %cond.load4
; AVX512F-NEXT: kshiftlw $12, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_8
; AVX512F-NEXT: ## BB#7: ## %cond.load7
; AVX512F-NEXT: kshiftlw $11, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_10
; AVX512F-NEXT: ## BB#9: ## %cond.load10
; AVX512F-NEXT: kshiftlw $10, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_12
; AVX512F-NEXT: ## BB#11: ## %cond.load13
; AVX512F-NEXT: kshiftlw $9, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_14
; AVX512F-NEXT: ## BB#13: ## %cond.load16
; AVX512F-NEXT: kshiftlw $8, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_16
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: kshiftlw $7, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_18
; AVX512F-NEXT: ## BB#17: ## %cond.load22
; AVX512F-NEXT: kshiftlw $6, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_20
; AVX512F-NEXT: ## BB#19: ## %cond.load25
; AVX512F-NEXT: kshiftlw $5, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_22
; AVX512F-NEXT: ## BB#21: ## %cond.load28
; AVX512F-NEXT: kshiftlw $4, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_24
; AVX512F-NEXT: ## BB#23: ## %cond.load31
; AVX512F-NEXT: kshiftlw $3, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_26
; AVX512F-NEXT: ## BB#25: ## %cond.load34
; AVX512F-NEXT: kshiftlw $2, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_28
; AVX512F-NEXT: ## BB#27: ## %cond.load37
; AVX512F-NEXT: kshiftlw $1, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_30
; AVX512F-NEXT: ## BB#29: ## %cond.load40
; AVX512F-NEXT: LBB50_30: ## %else41
; AVX512F-NEXT: kshiftrw $15, %k1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB50_32
; AVX512F-NEXT: ## BB#31: ## %cond.load43
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_2
; AVX512F-NEXT: ## BB#1: ## %cond.load
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_4
; AVX512F-NEXT: ## BB#3: ## %cond.load1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_6
; AVX512F-NEXT: ## BB#5: ## %cond.load4
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_8
; AVX512F-NEXT: ## BB#7: ## %cond.load7
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_10
; AVX512F-NEXT: ## BB#9: ## %cond.load10
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_12
; AVX512F-NEXT: ## BB#11: ## %cond.load13
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_14
; AVX512F-NEXT: ## BB#13: ## %cond.load16
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_16
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_18
; AVX512F-NEXT: ## BB#17: ## %cond.load22
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, (%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_20
; AVX512F-NEXT: ## BB#19: ## %cond.load25
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_22
; AVX512F-NEXT: ## BB#21: ## %cond.load28
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_24
; AVX512F-NEXT: ## BB#23: ## %cond.load31
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_26
; AVX512F-NEXT: ## BB#25: ## %cond.load34
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_28
; AVX512F-NEXT: ## BB#27: ## %cond.load37
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_30
; AVX512F-NEXT: ## BB#29: ## %cond.load40
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_32
; AVX512F-NEXT: ## BB#31: ## %cond.load43
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_34
; AVX512F-NEXT: ## BB#33: ## %cond.load46
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_36
; AVX512F-NEXT: ## BB#35: ## %cond.load49
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_38
; AVX512F-NEXT: ## BB#37: ## %cond.load52
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_40
; AVX512F-NEXT: ## BB#39: ## %cond.load55
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_42
; AVX512F-NEXT: ## BB#41: ## %cond.load58
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_44
; AVX512F-NEXT: ## BB#43: ## %cond.load61
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_46
; AVX512F-NEXT: ## BB#45: ## %cond.load64
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_48
; AVX512F-NEXT: ## BB#47: ## %cond.load67
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_50
; AVX512F-NEXT: ## BB#49: ## %cond.load70
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_52
; AVX512F-NEXT: ## BB#51: ## %cond.load73
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_54
; AVX512F-NEXT: ## BB#53: ## %cond.load76
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_56
; AVX512F-NEXT: ## BB#55: ## %cond.load79
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_58
; AVX512F-NEXT: ## BB#57: ## %cond.load82
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_60
; AVX512F-NEXT: ## BB#59: ## %cond.load85
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_62
; AVX512F-NEXT: ## BB#61: ## %cond.load88
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_64
; AVX512F-NEXT: ## BB#63: ## %cond.load91
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_66
; AVX512F-NEXT: ## BB#65: ## %cond.load94
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_68
; AVX512F-NEXT: ## BB#67: ## %cond.load97
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_70
; AVX512F-NEXT: ## BB#69: ## %cond.load100
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_72
; AVX512F-NEXT: ## BB#71: ## %cond.load103
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_74
; AVX512F-NEXT: ## BB#73: ## %cond.load106
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_76
; AVX512F-NEXT: ## BB#75: ## %cond.load109
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_78
; AVX512F-NEXT: ## BB#77: ## %cond.load112
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_80
; AVX512F-NEXT: ## BB#79: ## %cond.load115
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_82
; AVX512F-NEXT: ## BB#81: ## %cond.load118
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_84
; AVX512F-NEXT: ## BB#83: ## %cond.load121
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_86
; AVX512F-NEXT: ## BB#85: ## %cond.load124
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_88
; AVX512F-NEXT: ## BB#87: ## %cond.load127
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_90
; AVX512F-NEXT: ## BB#89: ## %cond.load130
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_92
; AVX512F-NEXT: ## BB#91: ## %cond.load133
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_94
; AVX512F-NEXT: ## BB#93: ## %cond.load136
; AVX512F-NEXT: vpinsrb $14, 46(%rdi), %xmm1, %xmm3
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; AVX512F-NEXT: LBB52_94: ## %else137
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
+; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k5
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_96
; AVX512F-NEXT: ## BB#95: ## %cond.load139
; AVX512F-NEXT: vpinsrb $15, 47(%rdi), %xmm1, %xmm2
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512F-NEXT: LBB52_96: ## %else140
-; AVX512F-NEXT: kshiftlw $15, %k1, %k0
+; AVX512F-NEXT: kshiftlw $15, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_98
; AVX512F-NEXT: ## BB#97: ## %cond.load142
; AVX512F-NEXT: vpinsrb $0, 48(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_98: ## %else143
-; AVX512F-NEXT: kshiftlw $14, %k1, %k0
+; AVX512F-NEXT: kshiftlw $14, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_100
; AVX512F-NEXT: ## BB#99: ## %cond.load145
; AVX512F-NEXT: vpinsrb $1, 49(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_100: ## %else146
-; AVX512F-NEXT: kshiftlw $13, %k1, %k0
+; AVX512F-NEXT: kshiftlw $13, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_102
; AVX512F-NEXT: ## BB#101: ## %cond.load148
; AVX512F-NEXT: vpinsrb $2, 50(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_102: ## %else149
-; AVX512F-NEXT: kshiftlw $12, %k1, %k0
+; AVX512F-NEXT: kshiftlw $12, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_104
; AVX512F-NEXT: ## BB#103: ## %cond.load151
; AVX512F-NEXT: vpinsrb $3, 51(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_104: ## %else152
-; AVX512F-NEXT: kshiftlw $11, %k1, %k0
+; AVX512F-NEXT: kshiftlw $11, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_106
; AVX512F-NEXT: ## BB#105: ## %cond.load154
; AVX512F-NEXT: vpinsrb $4, 52(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_106: ## %else155
-; AVX512F-NEXT: kshiftlw $10, %k1, %k0
+; AVX512F-NEXT: kshiftlw $10, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_108
; AVX512F-NEXT: ## BB#107: ## %cond.load157
; AVX512F-NEXT: vpinsrb $5, 53(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_108: ## %else158
-; AVX512F-NEXT: kshiftlw $9, %k1, %k0
+; AVX512F-NEXT: kshiftlw $9, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_110
; AVX512F-NEXT: ## BB#109: ## %cond.load160
; AVX512F-NEXT: vpinsrb $6, 54(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_110: ## %else161
-; AVX512F-NEXT: kshiftlw $8, %k1, %k0
+; AVX512F-NEXT: kshiftlw $8, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_112
; AVX512F-NEXT: ## BB#111: ## %cond.load163
; AVX512F-NEXT: vpinsrb $7, 55(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_112: ## %else164
-; AVX512F-NEXT: kshiftlw $7, %k1, %k0
+; AVX512F-NEXT: kshiftlw $7, %k5, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_114
; AVX512F-NEXT: ## BB#113: ## %cond.load166
; AVX512F-NEXT: vpinsrb $8, 56(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_114: ## %else167
-; AVX512F-NEXT: kshiftlw $6, %k1, %k2
-; AVX512F-NEXT: kshiftrw $15, %k2, %k2
+; AVX512F-NEXT: kshiftlw $6, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k2
; AVX512F-NEXT: kmovw %k2, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_116
; AVX512F-NEXT: ## BB#115: ## %cond.load169
; AVX512F-NEXT: vpinsrb $9, 57(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_116: ## %else170
-; AVX512F-NEXT: kshiftlw $5, %k1, %k3
-; AVX512F-NEXT: kshiftrw $15, %k3, %k3
+; AVX512F-NEXT: kshiftlw $5, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k3
; AVX512F-NEXT: kmovw %k3, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_118
; AVX512F-NEXT: ## BB#117: ## %cond.load172
; AVX512F-NEXT: vpinsrb $10, 58(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_118: ## %else173
-; AVX512F-NEXT: kshiftlw $4, %k1, %k4
-; AVX512F-NEXT: kshiftrw $15, %k4, %k4
+; AVX512F-NEXT: kshiftlw $4, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k4
; AVX512F-NEXT: kmovw %k4, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_120
; AVX512F-NEXT: ## BB#119: ## %cond.load175
; AVX512F-NEXT: vpinsrb $11, 59(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_120: ## %else176
-; AVX512F-NEXT: kshiftlw $3, %k1, %k5
-; AVX512F-NEXT: kshiftrw $15, %k5, %k5
-; AVX512F-NEXT: kmovw %k5, %eax
+; AVX512F-NEXT: kshiftlw $3, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k6
+; AVX512F-NEXT: kmovw %k6, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_122
; AVX512F-NEXT: ## BB#121: ## %cond.load178
; AVX512F-NEXT: vpinsrb $12, 60(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_122: ## %else179
-; AVX512F-NEXT: kshiftlw $2, %k1, %k6
-; AVX512F-NEXT: kshiftrw $15, %k6, %k6
-; AVX512F-NEXT: kmovw %k6, %eax
+; AVX512F-NEXT: kshiftlw $2, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k7
+; AVX512F-NEXT: kmovw %k7, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_124
; AVX512F-NEXT: ## BB#123: ## %cond.load181
; AVX512F-NEXT: vpinsrb $13, 61(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_124: ## %else182
-; AVX512F-NEXT: kshiftlw $1, %k1, %k7
-; AVX512F-NEXT: kshiftrw $15, %k7, %k7
-; AVX512F-NEXT: kmovw %k7, %eax
+; AVX512F-NEXT: kshiftlw $1, %k5, %k0
+; AVX512F-NEXT: kshiftrw $15, %k0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_126
; AVX512F-NEXT: ## BB#125: ## %cond.load184
; AVX512F-NEXT: vpinsrb $14, 62(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_126: ## %else185
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: kshiftrw $15, %k5, %k5
+; AVX512F-NEXT: kmovw %k5, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB52_128
; AVX512F-NEXT: ## BB#127: ## %cond.load187
; AVX512F-NEXT: vpinsrb $15, 63(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: LBB52_128: ## %else188
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw (%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw (%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, (%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; AVX512F-NEXT: kmovw %k2, %eax
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
; AVX512F-NEXT: kmovw %k3, %r12d
; AVX512F-NEXT: kmovw %k4, %r15d
-; AVX512F-NEXT: kmovw %k5, %r14d
-; AVX512F-NEXT: kmovw %k6, %ebx
-; AVX512F-NEXT: kmovw %k7, %r11d
-; AVX512F-NEXT: kmovw %k1, %r10d
+; AVX512F-NEXT: kmovw %k6, %r14d
+; AVX512F-NEXT: kmovw %k7, %ebx
+; AVX512F-NEXT: kmovw %k0, %r11d
+; AVX512F-NEXT: kmovw %k5, %r10d
; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; AVX512F-NEXT: kmovw %k0, %r15d
; AVX512F-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
-; AVX512F-NEXT: kmovw %k0, %r14d
+; AVX512F-NEXT: kmovw %k0, %ebp
; AVX512F-NEXT: vpinsrb $13, %ebx, %xmm6, %xmm6
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
; AVX512F-NEXT: kmovw %k0, %ebx
; AVX512F-NEXT: vpinsrb $8, %r13d, %xmm2, %xmm2
; AVX512F-NEXT: vpinsrb $9, %r12d, %xmm2, %xmm2
; AVX512F-NEXT: vpinsrb $10, %r15d, %xmm2, %xmm2
-; AVX512F-NEXT: vpinsrb $11, %r14d, %xmm2, %xmm2
+; AVX512F-NEXT: vpinsrb $11, %ebp, %xmm2, %xmm2
; AVX512F-NEXT: vpinsrb $12, %ebx, %xmm2, %xmm2
; AVX512F-NEXT: vpinsrb $13, %r11d, %xmm2, %xmm2
; AVX512F-NEXT: vpinsrb $14, %r10d, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $15, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: ## implicit-def: %XMM0
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_2
; AVX512F-NEXT: kshiftlw $14, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_4
; AVX512F-NEXT: ## BB#3: ## %cond.load1
; AVX512F-NEXT: kshiftlw $13, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_6
; AVX512F-NEXT: ## BB#5: ## %cond.load4
; AVX512F-NEXT: kshiftlw $12, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_8
; AVX512F-NEXT: ## BB#7: ## %cond.load7
; AVX512F-NEXT: kshiftlw $11, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_10
; AVX512F-NEXT: ## BB#9: ## %cond.load10
; AVX512F-NEXT: kshiftlw $10, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_12
; AVX512F-NEXT: ## BB#11: ## %cond.load13
; AVX512F-NEXT: kshiftlw $9, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_14
; AVX512F-NEXT: ## BB#13: ## %cond.load16
; AVX512F-NEXT: kshiftlw $8, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB53_16
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: kshiftlw $15, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: ## implicit-def: %YMM0
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_2
; AVX512F-NEXT: kshiftlw $14, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_4
; AVX512F-NEXT: ## BB#3: ## %cond.load1
; AVX512F-NEXT: kshiftlw $13, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_6
; AVX512F-NEXT: ## BB#5: ## %cond.load4
; AVX512F-NEXT: kshiftlw $12, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_8
; AVX512F-NEXT: ## BB#7: ## %cond.load7
; AVX512F-NEXT: kshiftlw $11, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_10
; AVX512F-NEXT: ## BB#9: ## %cond.load10
; AVX512F-NEXT: kshiftlw $10, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_12
; AVX512F-NEXT: ## BB#11: ## %cond.load13
; AVX512F-NEXT: kshiftlw $9, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_14
; AVX512F-NEXT: ## BB#13: ## %cond.load16
; AVX512F-NEXT: kshiftlw $8, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_16
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: kshiftlw $7, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_18
; AVX512F-NEXT: ## BB#17: ## %cond.load22
; AVX512F-NEXT: kshiftlw $6, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_20
; AVX512F-NEXT: ## BB#19: ## %cond.load25
; AVX512F-NEXT: kshiftlw $5, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_22
; AVX512F-NEXT: ## BB#21: ## %cond.load28
; AVX512F-NEXT: kshiftlw $4, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_24
; AVX512F-NEXT: ## BB#23: ## %cond.load31
; AVX512F-NEXT: kshiftlw $3, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_26
; AVX512F-NEXT: ## BB#25: ## %cond.load34
; AVX512F-NEXT: kshiftlw $2, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_28
; AVX512F-NEXT: ## BB#27: ## %cond.load37
; AVX512F-NEXT: kshiftlw $1, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_30
; AVX512F-NEXT: ## BB#29: ## %cond.load40
; AVX512F-NEXT: LBB54_30: ## %else41
; AVX512F-NEXT: kshiftrw $15, %k1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB54_32
; AVX512F-NEXT: ## BB#31: ## %cond.load43
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_2
; AVX512F-NEXT: ## BB#1: ## %cond.store
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_4
; AVX512F-NEXT: ## BB#3: ## %cond.store1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_6
; AVX512F-NEXT: ## BB#5: ## %cond.store3
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_8
; AVX512F-NEXT: ## BB#7: ## %cond.store5
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_10
; AVX512F-NEXT: ## BB#9: ## %cond.store7
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_12
; AVX512F-NEXT: ## BB#11: ## %cond.store9
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_14
; AVX512F-NEXT: ## BB#13: ## %cond.store11
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_16
; AVX512F-NEXT: ## BB#15: ## %cond.store13
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_18
; AVX512F-NEXT: ## BB#17: ## %cond.store15
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_20
; AVX512F-NEXT: ## BB#19: ## %cond.store17
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_22
; AVX512F-NEXT: ## BB#21: ## %cond.store19
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_24
; AVX512F-NEXT: ## BB#23: ## %cond.store21
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_26
; AVX512F-NEXT: ## BB#25: ## %cond.store23
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_28
; AVX512F-NEXT: ## BB#27: ## %cond.store25
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_30
; AVX512F-NEXT: ## BB#29: ## %cond.store27
; AVX512F-NEXT: LBB56_30: ## %else28
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB56_32
; AVX512F-NEXT: ## BB#31: ## %cond.store29
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_2
; AVX512F-NEXT: ## BB#1: ## %cond.store
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_4
; AVX512F-NEXT: ## BB#3: ## %cond.store1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_6
; AVX512F-NEXT: ## BB#5: ## %cond.store3
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_8
; AVX512F-NEXT: ## BB#7: ## %cond.store5
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_10
; AVX512F-NEXT: ## BB#9: ## %cond.store7
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_12
; AVX512F-NEXT: ## BB#11: ## %cond.store9
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_14
; AVX512F-NEXT: ## BB#13: ## %cond.store11
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_16
; AVX512F-NEXT: ## BB#15: ## %cond.store13
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_18
; AVX512F-NEXT: ## BB#17: ## %cond.store15
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_20
; AVX512F-NEXT: ## BB#19: ## %cond.store17
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_22
; AVX512F-NEXT: ## BB#21: ## %cond.store19
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_24
; AVX512F-NEXT: ## BB#23: ## %cond.store21
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_26
; AVX512F-NEXT: ## BB#25: ## %cond.store23
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_28
; AVX512F-NEXT: ## BB#27: ## %cond.store25
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_30
; AVX512F-NEXT: ## BB#29: ## %cond.store27
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_32
; AVX512F-NEXT: ## BB#31: ## %cond.store29
; AVX512F-NEXT: kshiftlw $15, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_34
; AVX512F-NEXT: ## BB#33: ## %cond.store31
; AVX512F-NEXT: kshiftlw $14, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_36
; AVX512F-NEXT: ## BB#35: ## %cond.store33
; AVX512F-NEXT: kshiftlw $13, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_38
; AVX512F-NEXT: ## BB#37: ## %cond.store35
; AVX512F-NEXT: kshiftlw $12, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_40
; AVX512F-NEXT: ## BB#39: ## %cond.store37
; AVX512F-NEXT: kshiftlw $11, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_42
; AVX512F-NEXT: ## BB#41: ## %cond.store39
; AVX512F-NEXT: kshiftlw $10, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_44
; AVX512F-NEXT: ## BB#43: ## %cond.store41
; AVX512F-NEXT: kshiftlw $9, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_46
; AVX512F-NEXT: ## BB#45: ## %cond.store43
; AVX512F-NEXT: kshiftlw $8, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_48
; AVX512F-NEXT: ## BB#47: ## %cond.store45
; AVX512F-NEXT: kshiftlw $7, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_50
; AVX512F-NEXT: ## BB#49: ## %cond.store47
; AVX512F-NEXT: kshiftlw $6, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_52
; AVX512F-NEXT: ## BB#51: ## %cond.store49
; AVX512F-NEXT: kshiftlw $5, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_54
; AVX512F-NEXT: ## BB#53: ## %cond.store51
; AVX512F-NEXT: kshiftlw $4, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_56
; AVX512F-NEXT: ## BB#55: ## %cond.store53
; AVX512F-NEXT: kshiftlw $3, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_58
; AVX512F-NEXT: ## BB#57: ## %cond.store55
; AVX512F-NEXT: kshiftlw $2, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_60
; AVX512F-NEXT: ## BB#59: ## %cond.store57
; AVX512F-NEXT: kshiftlw $1, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_62
; AVX512F-NEXT: ## BB#61: ## %cond.store59
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_64
; AVX512F-NEXT: ## BB#63: ## %cond.store61
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_66
; AVX512F-NEXT: ## BB#65: ## %cond.store63
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_68
; AVX512F-NEXT: ## BB#67: ## %cond.store65
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_70
; AVX512F-NEXT: ## BB#69: ## %cond.store67
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_72
; AVX512F-NEXT: ## BB#71: ## %cond.store69
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_74
; AVX512F-NEXT: ## BB#73: ## %cond.store71
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_76
; AVX512F-NEXT: ## BB#75: ## %cond.store73
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_78
; AVX512F-NEXT: ## BB#77: ## %cond.store75
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_80
; AVX512F-NEXT: ## BB#79: ## %cond.store77
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_82
; AVX512F-NEXT: ## BB#81: ## %cond.store79
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_84
; AVX512F-NEXT: ## BB#83: ## %cond.store81
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_86
; AVX512F-NEXT: ## BB#85: ## %cond.store83
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_88
; AVX512F-NEXT: ## BB#87: ## %cond.store85
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_90
; AVX512F-NEXT: ## BB#89: ## %cond.store87
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_92
; AVX512F-NEXT: ## BB#91: ## %cond.store89
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_94
; AVX512F-NEXT: ## BB#93: ## %cond.store91
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_96
; AVX512F-NEXT: ## BB#95: ## %cond.store93
; AVX512F-NEXT: kshiftlw $15, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_98
; AVX512F-NEXT: ## BB#97: ## %cond.store95
; AVX512F-NEXT: kshiftlw $14, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_100
; AVX512F-NEXT: ## BB#99: ## %cond.store97
; AVX512F-NEXT: kshiftlw $13, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_102
; AVX512F-NEXT: ## BB#101: ## %cond.store99
; AVX512F-NEXT: kshiftlw $12, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_104
; AVX512F-NEXT: ## BB#103: ## %cond.store101
; AVX512F-NEXT: kshiftlw $11, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_106
; AVX512F-NEXT: ## BB#105: ## %cond.store103
; AVX512F-NEXT: kshiftlw $10, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_108
; AVX512F-NEXT: ## BB#107: ## %cond.store105
; AVX512F-NEXT: kshiftlw $9, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_110
; AVX512F-NEXT: ## BB#109: ## %cond.store107
; AVX512F-NEXT: kshiftlw $8, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_112
; AVX512F-NEXT: ## BB#111: ## %cond.store109
; AVX512F-NEXT: kshiftlw $7, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_114
; AVX512F-NEXT: ## BB#113: ## %cond.store111
; AVX512F-NEXT: kshiftlw $6, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_116
; AVX512F-NEXT: ## BB#115: ## %cond.store113
; AVX512F-NEXT: kshiftlw $5, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_118
; AVX512F-NEXT: ## BB#117: ## %cond.store115
; AVX512F-NEXT: kshiftlw $4, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_120
; AVX512F-NEXT: ## BB#119: ## %cond.store117
; AVX512F-NEXT: kshiftlw $3, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_122
; AVX512F-NEXT: ## BB#121: ## %cond.store119
; AVX512F-NEXT: kshiftlw $2, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_124
; AVX512F-NEXT: ## BB#123: ## %cond.store121
; AVX512F-NEXT: kshiftlw $1, %k1, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_126
; AVX512F-NEXT: ## BB#125: ## %cond.store123
; AVX512F-NEXT: LBB58_126: ## %else124
; AVX512F-NEXT: kshiftrw $15, %k1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB58_128
; AVX512F-NEXT: ## BB#127: ## %cond.store125
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_2
; AVX512F-NEXT: ## BB#1: ## %cond.store
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_4
; AVX512F-NEXT: ## BB#3: ## %cond.store1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_6
; AVX512F-NEXT: ## BB#5: ## %cond.store3
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_8
; AVX512F-NEXT: ## BB#7: ## %cond.store5
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_10
; AVX512F-NEXT: ## BB#9: ## %cond.store7
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_12
; AVX512F-NEXT: ## BB#11: ## %cond.store9
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_14
; AVX512F-NEXT: ## BB#13: ## %cond.store11
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB59_16
; AVX512F-NEXT: ## BB#15: ## %cond.store13
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_2
; AVX512F-NEXT: ## BB#1: ## %cond.store
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_4
; AVX512F-NEXT: ## BB#3: ## %cond.store1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_6
; AVX512F-NEXT: ## BB#5: ## %cond.store3
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_8
; AVX512F-NEXT: ## BB#7: ## %cond.store5
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_10
; AVX512F-NEXT: ## BB#9: ## %cond.store7
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_12
; AVX512F-NEXT: ## BB#11: ## %cond.store9
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_14
; AVX512F-NEXT: ## BB#13: ## %cond.store11
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_16
; AVX512F-NEXT: ## BB#15: ## %cond.store13
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_18
; AVX512F-NEXT: ## BB#17: ## %cond.store15
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_20
; AVX512F-NEXT: ## BB#19: ## %cond.store17
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_22
; AVX512F-NEXT: ## BB#21: ## %cond.store19
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_24
; AVX512F-NEXT: ## BB#23: ## %cond.store21
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_26
; AVX512F-NEXT: ## BB#25: ## %cond.store23
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_28
; AVX512F-NEXT: ## BB#27: ## %cond.store25
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_30
; AVX512F-NEXT: ## BB#29: ## %cond.store27
; AVX512F-NEXT: LBB60_30: ## %else28
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: testb %al, %al
; AVX512F-NEXT: je LBB60_32
; AVX512F-NEXT: ## BB#31: ## %cond.store29
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movb %al, %cl
-; CHECK-NEXT: kmovw %ecx, %k0
+; CHECK-NEXT: movb %al, %dil
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: movzbl %al, %edi
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movb %al, %cl
-; CHECK-NEXT: kmovw %ecx, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: movb %cl, %al
+; CHECK-NEXT: movb %al, %dil
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: movl $-1, %edx
-; CHECK-NEXT: cmovnel %edx, %edi
+; CHECK-NEXT: movl $-1, %ecx
+; CHECK-NEXT: cmovnel %ecx, %edi
; CHECK-NEXT: callq callee2
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
; Note that the kmovs should really *not* appear in the output, this is an
; artifact of the current poor lowering. This is tracked by PR28175.
+; CHECK-LABEL: @foo64
+; CHECK: kmov
+; CHECK: kmov
+; CHECK: orq $-2, %rax
+; CHECK: ret
define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
-; CHECK-LABEL: foo64:
-; CHECK: # BB#0:
-; CHECK-NEXT: # kill
-; CHECK-NEXT: orq $-2, %rdi
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: retq
br label %bb
bb:
ret i64 %v
}
+; CHECK-LABEL: @foo16
+; CHECK: kmov
+; CHECK: kmov
+; CHECK: orl $65534, %eax
+; CHECK: retq
define i16 @foo16(i1 zeroext %i, i32 %j) #0 {
-; CHECK-LABEL: foo16:
-; CHECK: # BB#0:
-; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: retq
br label %bb
bb:
; KNL-LABEL: bug27873:
; KNL: ## BB#0:
; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movl $160, %ecx
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: mulq %rcx
+; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: seto %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k1, %k0, %k0