For i32 and i64 cases, X86ISD::SHLD/SHRD are close enough to ISD::FSHL/FSHR that we can use them directly, we just need to account for the operand commutation for SHRD.
The i16 SHLD/SHRD case is annoying as the shift amount is modulo-32 (vs funnel shift modulo-16), so I've added X86ISD::FSHL/FSHR equivalents, which matches the generic implementation in all other terms.
Something I'm slightly concerned with is that ISD::FSHL/FSHR legality is controlled by the Subtarget.isSHLDSlow() feature flag - we don't normally use non-ISA features for this but it allows the DAG combines to continue to operate after legalization in a lot more cases.
The X86 *bits.ll changes are all affected by the same issue - we now have a "FSHR(-1,-1,amt) -> ROTR(-1,amt) -> (-1)" simplification that reduces the dependencies enough for the branch fall through code to mess up.
Differential Revision: https://reviews.llvm.org/D75748
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ // For slow shld targets we only lower for code size.
+ LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
+
setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , Custom);
+ setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
- if (IsFSHR)
- std::swap(Op0, Op1);
-
// i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
- if (VT == MVT::i16)
+ if (VT == MVT::i16) {
Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
DAG.getConstant(15, DL, Amt.getValueType()));
+ unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
+ return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
+ }
- unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
- return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+ return Op;
}
// Try to use a packed vector operation to handle i64 on 32-bit targets when
#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
NODE_NAME_CASE(BSF)
NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(SHLD)
- NODE_NAME_CASE(SHRD)
+ NODE_NAME_CASE(FSHL)
+ NODE_NAME_CASE(FSHR)
NODE_NAME_CASE(FAND)
NODE_NAME_CASE(FANDN)
NODE_NAME_CASE(FOR)
/// Bit scan reverse.
BSR,
- /// Double shift instructions. These correspond to
- /// X86::SHLDxx and X86::SHRDxx instructions.
- SHLD,
- SHRD,
+ /// X86 funnel/double shift i16 instructions. These correspond to
+ /// X86::SHLDW and X86::SHRDW instructions which have different amt
+ /// modulo rules to generic funnel shifts.
+ /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
+ FSHL,
+ FSHR,
/// Bitwise logical AND of floating point values. This corresponds
/// to X86::ANDPS or X86::ANDPD.
defm : MaskedRotateAmountPats<rotl, "ROL">;
defm : MaskedRotateAmountPats<rotr, "ROR">;
-// Double shift amount is implicitly masked.
-multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
- // (shift x (and y, 31)) ==> (shift x, y)
- def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
- def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
-
- // (shift x (and y, 63)) ==> (shift x, y)
- def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
-}
-
-defm : MaskedDoubleShiftAmountPats<X86shld, "SHLD">;
-defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
+// Double "funnel" shift amount is implicitly masked.
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
+def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+// (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
let Predicates = [HasBMI2] in {
let AddedComplexity = 1 in {
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
-def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
-def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
+def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>,
TB, OpSize16;
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>,
TB, OpSize16;
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>,
TB, OpSize32;
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>,
TB, OpSize32;
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>,
TB;
def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>,
TB;
} // SchedRW
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1,
(i8 imm:$src3)))]>,
TB;
} // SchedRW
let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize16;
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize16;
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)]>, TB, OpSize32;
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB, OpSize32;
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize32;
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL),
+ addr:$dst)]>, TB;
} // SchedRW
let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
} // SchedRW
define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c0:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB13_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB13_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB13_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB13_3
+; X86-FALLBACK0-NEXT: .LBB13_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB13_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c0:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c1_indexzext:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB14_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB14_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB14_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB14_3
+; X86-FALLBACK0-NEXT: .LBB14_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB14_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c1_indexzext:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c2_load:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %edi
; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB15_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %edi
+; X86-FALLBACK0-NEXT: shrl %cl, %edi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB15_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB15_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %edi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB15_3
+; X86-FALLBACK0-NEXT: .LBB15_1:
+; X86-FALLBACK0-NEXT: movl %edi, %eax
+; X86-FALLBACK0-NEXT: .LBB15_3:
; X86-FALLBACK0-NEXT: andl (%esi), %eax
; X86-FALLBACK0-NEXT: andl 4(%esi), %edx
; X86-FALLBACK0-NEXT: popl %esi
+; X86-FALLBACK0-NEXT: popl %edi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c2_load:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %edi
-; X86-FALLBACK1-NEXT: shrl %cl, %edi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK1-NEXT: cmovel %edi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %edi, %eax
; X86-FALLBACK1-NEXT: andl (%esi), %eax
; X86-FALLBACK1-NEXT: andl 4(%esi), %edx
; X86-FALLBACK1-NEXT: popl %esi
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %edi
-; X86-FALLBACK2-NEXT: shrl %cl, %edi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK2-NEXT: cmovel %edi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %edi, %eax
; X86-FALLBACK2-NEXT: andl (%esi), %eax
; X86-FALLBACK2-NEXT: andl 4(%esi), %edx
; X86-FALLBACK2-NEXT: popl %esi
;
; X86-BMI2-LABEL: clear_highbits64_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
+; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %edi, %eax
-; X86-BMI2-NEXT: cmovel %edi, %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: andl 4(%esi), %edx
+; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c3_load_indexzext:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %edi
; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB16_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %edi
+; X86-FALLBACK0-NEXT: shrl %cl, %edi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB16_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB16_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %edi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB16_3
+; X86-FALLBACK0-NEXT: .LBB16_1:
+; X86-FALLBACK0-NEXT: movl %edi, %eax
+; X86-FALLBACK0-NEXT: .LBB16_3:
; X86-FALLBACK0-NEXT: andl (%esi), %eax
; X86-FALLBACK0-NEXT: andl 4(%esi), %edx
; X86-FALLBACK0-NEXT: popl %esi
+; X86-FALLBACK0-NEXT: popl %edi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c3_load_indexzext:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %edi
-; X86-FALLBACK1-NEXT: shrl %cl, %edi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK1-NEXT: cmovel %edi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %edi, %eax
; X86-FALLBACK1-NEXT: andl (%esi), %eax
; X86-FALLBACK1-NEXT: andl 4(%esi), %edx
; X86-FALLBACK1-NEXT: popl %esi
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %edi
-; X86-FALLBACK2-NEXT: shrl %cl, %edi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK2-NEXT: cmovel %edi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %edi, %eax
; X86-FALLBACK2-NEXT: andl (%esi), %eax
; X86-FALLBACK2-NEXT: andl 4(%esi), %edx
; X86-FALLBACK2-NEXT: popl %esi
;
; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
+; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %edi, %eax
-; X86-BMI2-NEXT: cmovel %edi, %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: andl 4(%esi), %edx
+; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c4_commutative:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB17_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB17_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB17_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB17_3
+; X86-FALLBACK0-NEXT: .LBB17_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB17_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c4_commutative:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
; X86-FALLBACK0-NEXT: movl $-1, %esi
; X86-FALLBACK0-NEXT: movl $-1, %edi
; X86-FALLBACK0-NEXT: shrl %cl, %edi
-; X86-FALLBACK0-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK0-NEXT: testb $32, %cl
; X86-FALLBACK0-NEXT: je .LBB19_2
; X86-FALLBACK0-NEXT: # %bb.1:
; X86-FALLBACK1-NEXT: movl $-1, %esi
; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: shrl %cl, %eax
-; X86-FALLBACK1-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK1-NEXT: xorl %edi, %edi
; X86-FALLBACK1-NEXT: testb $32, %cl
; X86-FALLBACK1-NEXT: cmovnel %eax, %esi
; X86-FALLBACK2-NEXT: movl $-1, %esi
; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: shrl %cl, %eax
-; X86-FALLBACK2-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK2-NEXT: xorl %edi, %edi
; X86-FALLBACK2-NEXT: testb $32, %cl
; X86-FALLBACK2-NEXT: cmovnel %eax, %esi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $-1, %esi
-; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT: shrdl %cl, %esi, %esi
+; X86-BMI2-NEXT: shrxl %eax, %esi, %ecx
; X86-BMI2-NEXT: xorl %edi, %edi
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %eax, %esi
-; X86-BMI2-NEXT: cmovel %eax, %edi
+; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: cmovnel %ecx, %esi
+; X86-BMI2-NEXT: cmovel %ecx, %edi
; X86-BMI2-NEXT: subl $8, %esp
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB13_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB13_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB13_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB13_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c0:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB14_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB14_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB14_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB14_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB15_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB15_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB15_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB15_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c2_load:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB16_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB16_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB16_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB16_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB17_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB17_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c4_commutative:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB17_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB17_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB31_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB31_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic0:
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB31_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB31_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB32_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB32_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB32_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB32_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB33_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB33_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb $64, %cl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB33_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB33_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB34_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB34_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb $64, %cl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB34_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB34_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB35_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB35_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative:
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB35_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB35_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
; X86-NOBMI2-NEXT: pushl %eax
; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: movl $-1, %esi
-; X86-NOBMI2-NEXT: movl $-1, %edi
-; X86-NOBMI2-NEXT: shll %cl, %edi
-; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi
-; X86-NOBMI2-NEXT: testb $32, %cl
-; X86-NOBMI2-NEXT: je .LBB37_2
-; X86-NOBMI2-NEXT: # %bb.1:
-; X86-NOBMI2-NEXT: movl %edi, %esi
+; X86-NOBMI2-NEXT: movl $-1, %eax
+; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: xorl %edi, %edi
-; X86-NOBMI2-NEXT: .LBB37_2:
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: jne .LBB37_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edi
+; X86-NOBMI2-NEXT: jmp .LBB37_3
+; X86-NOBMI2-NEXT: .LBB37_1:
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB37_3:
; X86-NOBMI2-NEXT: subl $8, %esp
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: pushl %edi
; X86-NOBMI2-NEXT: calll use64
; X86-NOBMI2-NEXT: addl $16, %esp
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-NOBMI2-NEXT: movl %edi, %eax
; X86-NOBMI2-NEXT: movl %esi, %edx
; X86-NOBMI2-NEXT: addl $4, %esp
; X86-BMI2-NEXT: pushl %eax
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %esi
-; X86-BMI2-NEXT: shlxl %ecx, %esi, %edi
-; X86-BMI2-NEXT: shldl %cl, %esi, %esi
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: je .LBB37_2
-; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %edi, %esi
+; X86-BMI2-NEXT: shlxl %ecx, %esi, %eax
; X86-BMI2-NEXT: xorl %edi, %edi
-; X86-BMI2-NEXT: .LBB37_2:
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: jne .LBB37_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edi
+; X86-BMI2-NEXT: jmp .LBB37_3
+; X86-BMI2-NEXT: .LBB37_1:
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB37_3:
; X86-BMI2-NEXT: subl $8, %esp
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: calll use64
; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %edi, %eax
; X86-BMI2-NEXT: movl %esi, %edx
; X86-BMI2-NEXT: addl $4, %esp
define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b0:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB25_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB25_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB25_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB25_4:
+; X86-NOBMI-NEXT: jne .LBB25_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB25_5
+; X86-NOBMI-NEXT: .LBB25_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB25_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b0:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB25_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-LABEL: bextr64_b0:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB25_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB25_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB25_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB25_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b1_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB26_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB26_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB26_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB26_4:
+; X86-NOBMI-NEXT: jne .LBB26_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB26_5
+; X86-NOBMI-NEXT: .LBB26_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB26_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB26_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB26_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB26_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB26_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB26_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b2_load:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB27_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB27_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB27_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB27_4:
+; X86-NOBMI-NEXT: jne .LBB27_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB27_5
+; X86-NOBMI-NEXT: .LBB27_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB27_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b2_load:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB27_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-LABEL: bextr64_b2_load:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi
+; X86-BMI1BMI2-NEXT: movl (%edx), %eax
+; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB27_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB27_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB27_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB27_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB28_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB28_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB28_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB28_4:
+; X86-NOBMI-NEXT: jne .LBB28_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB28_5
+; X86-NOBMI-NEXT: .LBB28_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB28_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB28_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi
+; X86-BMI1BMI2-NEXT: movl (%edx), %eax
+; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB28_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB28_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB28_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB28_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b4_commutative:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax
+; X86-NOBMI-NEXT: xorl %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB29_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: .LBB29_2:
; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %esi
-; X86-NOBMI-NEXT: shldl %cl, %edi, %edi
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB29_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %esi, %edi
-; X86-NOBMI-NEXT: xorl %esi, %esi
-; X86-NOBMI-NEXT: .LBB29_4:
+; X86-NOBMI-NEXT: jne .LBB29_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %esi
+; X86-NOBMI-NEXT: jmp .LBB29_5
+; X86-NOBMI-NEXT: .LBB29_3:
+; X86-NOBMI-NEXT: movl %ebx, %edi
+; X86-NOBMI-NEXT: .LBB29_5:
; X86-NOBMI-NEXT: notl %edi
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %esi
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB29_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-LABEL: bextr64_b4_commutative:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB29_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB29_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB29_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB29_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %esi, %ebp
-; X86-NOBMI-NEXT: movl %eax, %ecx
+; X86-NOBMI-NEXT: movb %al, %cl
; X86-NOBMI-NEXT: shrl %cl, %ebp
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %ebx
+; X86-NOBMI-NEXT: shrdl %cl, %esi, %edx
+; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: testb $32, %al
; X86-NOBMI-NEXT: je .LBB30_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %ebp, %ebx
+; X86-NOBMI-NEXT: movl %ebp, %edx
; X86-NOBMI-NEXT: xorl %ebp, %ebp
; X86-NOBMI-NEXT: .LBB30_2:
-; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: movl %edx, %ecx
-; X86-NOBMI-NEXT: shll %cl, %edi
-; X86-NOBMI-NEXT: shldl %cl, %esi, %esi
-; X86-NOBMI-NEXT: testb $32, %dl
-; X86-NOBMI-NEXT: je .LBB30_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %edi, %esi
-; X86-NOBMI-NEXT: xorl %edi, %edi
-; X86-NOBMI-NEXT: .LBB30_4:
-; X86-NOBMI-NEXT: notl %esi
-; X86-NOBMI-NEXT: andl %ebp, %esi
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movb %ch, %cl
+; X86-NOBMI-NEXT: shll %cl, %esi
+; X86-NOBMI-NEXT: testb $32, %ch
+; X86-NOBMI-NEXT: jne .LBB30_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %esi, %ebx
+; X86-NOBMI-NEXT: jmp .LBB30_5
+; X86-NOBMI-NEXT: .LBB30_3:
+; X86-NOBMI-NEXT: movl %esi, %edi
+; X86-NOBMI-NEXT: .LBB30_5:
; X86-NOBMI-NEXT: notl %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: andl %ebp, %edi
+; X86-NOBMI-NEXT: notl %ebx
+; X86-NOBMI-NEXT: andl %edx, %ebx
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %edi, %eax
-; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: movl %edi, %edx
; X86-NOBMI-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl %edx, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebp
-; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx
; X86-BMI1NOTBM-NEXT: testb $32, %dl
; X86-BMI1NOTBM-NEXT: je .LBB30_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: subl $12, %esp
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB30_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edi
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB30_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %ebp
-; X86-BMI1BMI2-NEXT: shlxl %edx, %ebp, %ebx
-; X86-BMI1BMI2-NEXT: movl %edx, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %ebp
-; X86-BMI1BMI2-NEXT: testb $32, %dl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %edi
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB30_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edi, %esi
+; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB30_4:
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebp, %esi
-; X86-BMI1BMI2-NEXT: andnl %edi, %ebx, %edi
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %esi
+; X86-BMI1BMI2-NEXT: andnl %eax, %edi, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %ebp
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
; X86-BMI1BMI2-NEXT: movl %edi, %eax
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB41_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB41_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB41_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB41_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB41_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB42_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB42_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB42_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB42_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB42_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB43_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB43_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB43_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB43_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB43_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB44_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB44_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB44_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB44_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB44_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB45_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB45_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB45_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB45_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB45_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: shrl %cl, %ebp
-; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB46_4
; X86-NOBMI-NEXT: # %bb.3:
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %ebx
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB46_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB46_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: movl $-1, %ebp
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB46_4
; X86-BMI1BMI2-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
; X86-NOBMI-NEXT: .LBB47_2:
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: jne .LBB47_4
; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: .LBB47_4:
; X86-NOBMI-NEXT: andl %edx, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: .LBB47_2:
; X86-BMI1NOTBM-NEXT: movb $64, %cl
; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: jne .LBB47_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: .LBB47_4:
; X86-BMI1NOTBM-NEXT: andl %edx, %eax
; X86-BMI1NOTBM-NEXT: popl %esi
;
; X86-BMI1BMI2-LABEL: bextr64_32_c0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: .LBB47_2:
; X86-BMI1BMI2-NEXT: movb $64, %cl
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB47_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB47_4:
; X86-BMI1BMI2-NEXT: andl %edx, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bextr64_32_c0:
define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b0:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB20_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB20_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB20_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB20_3
+; X86-NOBMI-NEXT: .LBB20_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB20_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b0:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB20_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB20_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB20_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB20_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b0:
define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB21_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB21_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB21_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB21_3
+; X86-NOBMI-NEXT: .LBB21_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB21_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB21_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB21_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB21_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB21_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b2_load:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB22_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %edi
+; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB22_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB22_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: jmp .LBB22_3
+; X86-NOBMI-NEXT: .LBB22_1:
+; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: .LBB22_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: andl (%esi), %eax
+; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b2_load:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: pushl %esi
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB22_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %edx
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %esi
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB22_2:
-; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax
+; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b2_load:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB22_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edx
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %edx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB22_2:
-; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b2_load:
define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB23_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %edi
+; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB23_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB23_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: jmp .LBB23_3
+; X86-NOBMI-NEXT: .LBB23_1:
+; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: .LBB23_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: andl (%esi), %eax
+; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: pushl %esi
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB23_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %edx
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %esi
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB23_2:
-; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax
+; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB23_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edx
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %edx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB23_2:
-; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b4_commutative:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB24_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB24_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB24_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB24_3
+; X86-NOBMI-NEXT: .LBB24_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB24_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB24_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB24_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB24_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB24_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b4_commutative:
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB34_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB34_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB34_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB34_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB35_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB35_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB35_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB35_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB36_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB36_2:
-; X86-NOBMI-NEXT: movl (%edx), %esi
-; X86-NOBMI-NEXT: andl %eax, %esi
-; X86-NOBMI-NEXT: movl 4(%edx), %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: movl 4(%edx), %esi
+; X86-NOBMI-NEXT: andl %ebx, %esi
+; X86-NOBMI-NEXT: movl (%edx), %edi
+; X86-NOBMI-NEXT: andl %eax, %edi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB36_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl %ebx, %eax
; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx
; X86-BMI1NOTBM-NEXT: .LBB36_2:
-; X86-BMI1NOTBM-NEXT: movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT: andl %eax, %esi
-; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT: andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi
+; X86-BMI1NOTBM-NEXT: andl %ebx, %esi
+; X86-BMI1NOTBM-NEXT: movl (%edx), %edi
+; X86-BMI1NOTBM-NEXT: andl %eax, %edi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: pushl %ebx
; X86-BMI1NOTBM-NEXT: pushl %eax
; X86-BMI1NOTBM-NEXT: calll use64
; X86-BMI1NOTBM-NEXT: addl $16, %esp
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: movl %edi, %edx
+; X86-BMI1NOTBM-NEXT: movl %edi, %eax
+; X86-BMI1NOTBM-NEXT: movl %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: popl %edi
; X86-BMI1NOTBM-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb $64, %bl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB36_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %ebx, %eax
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edx, %ecx
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB36_2:
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: andl %eax, %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: andl %ebx, %edi
+; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi
+; X86-BMI1BMI2-NEXT: andl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl (%eax), %edi
+; X86-BMI1BMI2-NEXT: andl %ecx, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %edx
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB37_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB37_2:
-; X86-NOBMI-NEXT: movl (%edx), %esi
-; X86-NOBMI-NEXT: andl %eax, %esi
-; X86-NOBMI-NEXT: movl 4(%edx), %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: movl 4(%edx), %esi
+; X86-NOBMI-NEXT: andl %ebx, %esi
+; X86-NOBMI-NEXT: movl (%edx), %edi
+; X86-NOBMI-NEXT: andl %eax, %edi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB37_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl %ebx, %eax
; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx
; X86-BMI1NOTBM-NEXT: .LBB37_2:
-; X86-BMI1NOTBM-NEXT: movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT: andl %eax, %esi
-; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT: andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi
+; X86-BMI1NOTBM-NEXT: andl %ebx, %esi
+; X86-BMI1NOTBM-NEXT: movl (%edx), %edi
+; X86-BMI1NOTBM-NEXT: andl %eax, %edi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: pushl %ebx
; X86-BMI1NOTBM-NEXT: pushl %eax
; X86-BMI1NOTBM-NEXT: calll use64
; X86-BMI1NOTBM-NEXT: addl $16, %esp
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: movl %edi, %edx
+; X86-BMI1NOTBM-NEXT: movl %edi, %eax
+; X86-BMI1NOTBM-NEXT: movl %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: popl %edi
; X86-BMI1NOTBM-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb $64, %bl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB37_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %ebx, %eax
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edx, %ecx
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB37_2:
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: andl %eax, %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: andl %ebx, %edi
+; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi
+; X86-BMI1BMI2-NEXT: andl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl (%eax), %edi
+; X86-BMI1BMI2-NEXT: andl %ecx, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %edx
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB38_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB38_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB38_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB38_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
-; X86-NOBMI-NEXT: shrdl %cl, %edx, %edx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: jne .LBB39_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %edx, %eax
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: .LBB39_2:
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: retl
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: movb $64, %cl
; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edx
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: jne .LBB39_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %edx, %eax
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: .LBB39_2:
; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: retl
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movb $64, %cl
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB39_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB39_2:
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
define i64 @combine_fshl_load_i64(i64* %p) nounwind {
; X86-FAST-LABEL: combine_fshl_load_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl 12(%ecx), %eax
-; X86-FAST-NEXT: movl 16(%ecx), %esi
-; X86-FAST-NEXT: movl 20(%ecx), %edx
-; X86-FAST-NEXT: shldl $24, %esi, %edx
-; X86-FAST-NEXT: shrdl $8, %esi, %eax
-; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: movl 13(%ecx), %eax
+; X86-FAST-NEXT: movl 17(%ecx), %edx
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshl_load_i64:
define i64 @combine_fshr_load_i64(i64* %p) nounwind {
; X86-FAST-LABEL: combine_fshr_load_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movzbl 11(%eax), %ecx
-; X86-FAST-NEXT: movl 12(%eax), %esi
-; X86-FAST-NEXT: movl 16(%eax), %edx
-; X86-FAST-NEXT: shldl $8, %esi, %edx
-; X86-FAST-NEXT: movl %esi, %eax
-; X86-FAST-NEXT: shll $8, %eax
-; X86-FAST-NEXT: orl %ecx, %eax
-; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl 11(%ecx), %eax
+; X86-FAST-NEXT: movl 15(%ecx), %edx
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshr_load_i64:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: shrdl $8, %edx, %eax
; X32-NEXT: shrl $8, %edx
; X32-NEXT: incl %edx
; X32-NEXT: shrdl $8, %edx, %eax
; CHECK-LABEL: int87:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rax
-; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rdx
-; CHECK-NEXT: movzbl %sil, %ecx
-; CHECK-NEXT: shll $6, %ecx
+; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rcx
+; CHECK-NEXT: movzbl %sil, %edx
+; CHECK-NEXT: shll $6, %edx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %for.cond
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rdx, %rsi
-; CHECK-NEXT: shrdq %cl, %rax, %rsi
-; CHECK-NEXT: testb $64, %cl
+; CHECK-NEXT: testb $64, %dl
+; CHECK-NEXT: movq %rcx, %rsi
; CHECK-NEXT: cmovneq %rax, %rsi
; CHECK-NEXT: orl $0, %esi
; CHECK-NEXT: je .LBB0_1