From b7deb71ef5e0fd35581c065fa42243a992ff7701 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 8 Feb 2023 17:53:59 +0000 Subject: [PATCH] [DAG] Fold freeze(build_pair(x,y)) -> build_pair(freeze(x),freeze(y)) One of the cleanups necessary for D136529 - another being how we're going to handle moving freeze through multiple result nodes (like uaddo and subcarry) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + .../X86/div-rem-pair-recomposition-signed.ll | 451 +++++++++++---------- .../X86/div-rem-pair-recomposition-unsigned.ll | 397 +++++++++--------- 4 files changed, 427 insertions(+), 425 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7249c63..58f2ba0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14710,7 +14710,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { N0->getNumValues() != 1 || !N0->hasOneUse()) return SDValue(); - bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR; + bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR || + N0.getOpcode() == ISD::BUILD_PAIR; SmallSetVector MaybePoisonOperands; for (SDValue Op : N0->ops()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8292f9c..074b19b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4778,6 +4778,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::ZERO_EXTEND_VECTOR_INREG: case ISD::BITCAST: case ISD::BUILD_VECTOR: + case ISD::BUILD_PAIR: return false; case ISD::ADD: diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index 70258d7..7ce11ad 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -177,18 +177,19 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $152, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: subl $156, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sarl $31, %ebp -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl %ecx, %esi +; X86-NEXT: movl %esi, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %eax, %ecx @@ -196,71 +197,71 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl %edi, %esi +; X86-NEXT: xorl %ebp, %esi +; X86-NEXT: movl %edi, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %edi, %ebx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi -; X86-NEXT: subl %ebp, %edi -; X86-NEXT: sbbl %ebp, %ebx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: xorl %eax, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: subl %edi, %ebx +; X86-NEXT: sbbl %edi, %ebp +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: sbbl %edi, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, %edi ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %cl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al ; X86-NEXT: orb %cl, %al -; X86-NEXT: movb %al, (%esp) # 1-byte Spill +; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %ebp, %ecx +; X86-NEXT: bsrl %edi, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx ; X86-NEXT: testl %esi, %esi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: bsrl %ebx, %edx +; X86-NEXT: bsrl %ebp, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: bsrl %edi, %edi -; X86-NEXT: xorl $31, %edi -; X86-NEXT: addl $32, %edi ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovnel %edx, %edi -; X86-NEXT: addl $64, %edi +; X86-NEXT: bsrl %ebx, %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: addl $32, %ebx ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %ebp, %ebp +; X86-NEXT: cmovnel %edx, %ebx +; X86-NEXT: addl $64, %ebx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %ebp -; X86-NEXT: cmovnel %ecx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: bsrl %ebx, %edx +; X86-NEXT: orl %esi, %edi +; X86-NEXT: cmovnel %ecx, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: bsrl %ebp, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: bsrl %eax, %esi ; X86-NEXT: xorl $31, %esi ; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -270,52 +271,53 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: addl $64, %edx ; X86-NEXT: movl %ebp, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: subl %edx, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: subl %edx, %ebx ; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: movl $0, %esi +; X86-NEXT: sbbl %esi, %esi ; X86-NEXT: movl $127, %ecx -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %edi, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl %ebx, %ecx ; X86-NEXT: movl $0, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: setb %cl -; X86-NEXT: orb (%esp), %cl # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovnel %esi, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovnel %esi, %ebp -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovnel %esi, %eax -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: cmovnel %ebp, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: cmovnel %ebp, %edi +; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: cmovnel %ebp, %esi +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: jne .LBB4_1 ; X86-NEXT: # %bb.8: # %_udiv-special-cases -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: xorl $127, %edx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl $127, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %esi, %ebp ; X86-NEXT: je .LBB4_9 ; X86-NEXT: # %bb.5: # %udiv-bb1 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) @@ -327,51 +329,50 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: xorb $127, %al ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %esi -; X86-NEXT: movl 144(%esp,%esi), %edx -; X86-NEXT: movl 148(%esp,%esi), %ebx +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 148(%esp,%eax), %edx +; X86-NEXT: movl 152(%esp,%eax), %ebx ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shldl %cl, %edx, %ebx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl -; X86-NEXT: movl 140(%esp,%esi), %eax -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl 144(%esp,%eax), %ebp +; X86-NEXT: movl %ebp, %edi ; X86-NEXT: shrl %edi ; X86-NEXT: shrl %cl, %edi ; X86-NEXT: orl %edx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 136(%esp,%esi), %edx +; X86-NEXT: movl 140(%esp,%eax), %eax ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edx, %eax -; X86-NEXT: shll %cl, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl $1, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %ebp +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl $1, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: adcl $0, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: adcl $0, %ebp ; X86-NEXT: jae .LBB4_2 ; X86-NEXT: # %bb.6: -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: jmp .LBB4_7 ; X86-NEXT: .LBB4_1: -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: movl %esi, %ebp ; X86-NEXT: jmp .LBB4_9 ; X86-NEXT: .LBB4_2: # %udiv-preheader ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl (%esp), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) @@ -381,37 +382,36 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movb %dl, %ch +; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch -; X86-NEXT: movb %dl, %cl -; X86-NEXT: shrb $3, %cl -; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl %cl, %edx -; X86-NEXT: movl 100(%esp,%edx), %esi +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $15, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl 104(%esp,%eax), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 100(%esp,%eax), %ebx ; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: movl 96(%esp,%edx), %edi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %edi, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 88(%esp,%ebx), %ebp -; X86-NEXT: movl 92(%esp,%ebx), %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shrdl %cl, %esi, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl 92(%esp,%eax), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 96(%esp,%eax), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: notb %cl -; X86-NEXT: addl %edi, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shrdl %cl, %ebx, %ebp +; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shrdl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -421,174 +421,179 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $-1, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB4_3: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebx +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: shldl $1, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl $1, %esi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: shldl $1, %ebp, %edx -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: shldl $1, %edx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %edi -; X86-NEXT: orl %eax, %edi +; X86-NEXT: shldl $1, %ecx, %edx +; X86-NEXT: shldl $1, %edi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %ebp, %edi +; X86-NEXT: orl %ebx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl $1, %ecx, %ebp +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %esi, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: sbbl %ebp, %ecx ; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: andl $1, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: subl %ecx, %ebp -; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: subl %ecx, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edi, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $-1, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: adcl $-1, %esi +; X86-NEXT: adcl $-1, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: jne .LBB4_3 ; X86-NEXT: # %bb.4: -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: shldl $1, %edi, %ebx -; X86-NEXT: orl %ecx, %ebx -; X86-NEXT: shldl $1, %eax, %edi -; X86-NEXT: orl %ecx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: addl %esi, %esi -; X86-NEXT: orl %edx, %esi +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: shldl $1, %ebp, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, %ebp +; X86-NEXT: orl %eax, %ebp +; X86-NEXT: addl %edx, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: .LBB4_9: # %udiv-end -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: xorl %ecx, %ebx -; X86-NEXT: xorl %ecx, %edi -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: subl %ecx, %esi -; X86-NEXT: sbbl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ecx, %edi -; X86-NEXT: sbbl %ecx, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: movl %eax, 4(%ecx) -; X86-NEXT: movl %edi, 8(%ecx) -; X86-NEXT: movl %ebx, 12(%ecx) -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl %eax, %ebx +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: xorl %eax, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: movl %ebp, 4(%edx) +; X86-NEXT: movl %edi, 8(%edx) +; X86-NEXT: movl %ebx, 12(%edx) +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %ebp +; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %ebp ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: setb %bl +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull %esi +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: setb (%esp) # 1-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: mull %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: imull {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl %edx, %edi -; X86-NEXT: addl %ecx, %edi +; X86-NEXT: imull %esi, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %eax, %esi -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: mull %ebp -; X86-NEXT: addl %edx, %edi -; X86-NEXT: addl %esi, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: imull %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: imull %edx, %ebx +; X86-NEXT: mull %edx +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: adcl %ecx, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: sbbl %ebx, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: addl $152, %esp +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: addl $156, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 76ba286..454f8b7 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -177,114 +177,111 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $136, %esp +; X86-NEXT: subl $132, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: orl %ebp, %eax -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %ebp, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sete %cl +; X86-NEXT: sete %bl ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al -; X86-NEXT: orb %cl, %al +; X86-NEXT: orb %bl, %al ; X86-NEXT: movb %al, (%esp) # 1-byte Spill -; X86-NEXT: bsrl %ebp, %edx +; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bsrl %eax, %ecx +; X86-NEXT: bsrl %ebp, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %ebp, %ebp +; X86-NEXT: testl %esi, %esi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: bsrl %ebx, %edx +; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %edi, %edi -; X86-NEXT: xorl $31, %edi -; X86-NEXT: addl $32, %edi -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovnel %edx, %edi -; X86-NEXT: addl $64, %edi -; X86-NEXT: movl %eax, %edx -; X86-NEXT: orl %ebp, %edx -; X86-NEXT: cmovnel %ecx, %edi +; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl $31, %eax +; X86-NEXT: addl $32, %eax +; X86-NEXT: testl %edi, %edi +; X86-NEXT: cmovnel %edx, %eax +; X86-NEXT: addl $64, %eax +; X86-NEXT: orl %esi, %ebp +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: bsrl %ebp, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %esi, %ecx -; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: bsrl %ebx, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx ; X86-NEXT: testl %ebp, %ebp ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bsrl %eax, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: bsrl %edi, %esi ; X86-NEXT: xorl $31, %esi ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: addl $32, %edx -; X86-NEXT: testl %eax, %eax +; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: addl $64, %edx ; X86-NEXT: movl %ebx, %esi ; X86-NEXT: orl %ebp, %esi ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl %edx, %edi -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebp, %ebp +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subl %edx, %eax ; X86-NEXT: movl $0, %esi ; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: movl $127, %ecx -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %edi, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ecx -; X86-NEXT: movl $0, %ecx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %edi, %edi +; X86-NEXT: movl $127, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %ecx -; X86-NEXT: setb %cl -; X86-NEXT: orb (%esp), %cl # 1-byte Folded Reload -; X86-NEXT: cmovnel %ebx, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovnel %ebx, %esi +; X86-NEXT: cmpl %eax, %edx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: setb %dl +; X86-NEXT: orb (%esp), %dl # 1-byte Folded Reload +; X86-NEXT: cmovnel %ecx, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovnel %ebx, %edi -; X86-NEXT: cmovel {{[0-9]+}}(%esp), %ebx -; X86-NEXT: jne .LBB4_1 -; X86-NEXT: # %bb.8: # %_udiv-special-cases +; X86-NEXT: cmovnel %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: jne .LBB4_8 +; X86-NEXT: # %bb.1: # %_udiv-special-cases +; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: xorl $127, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ebx, %ecx ; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: je .LBB4_9 -; X86-NEXT: # %bb.5: # %udiv-bb1 -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB4_8 +; X86-NEXT: # %bb.2: # %udiv-bb1 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -299,49 +296,44 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: andb $15, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax -; X86-NEXT: movl 128(%esp,%eax), %edx -; X86-NEXT: movl 132(%esp,%eax), %esi +; X86-NEXT: movl 124(%esp,%eax), %edx +; X86-NEXT: movl 128(%esp,%eax), %edi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl -; X86-NEXT: movl 124(%esp,%eax), %edi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: shrl %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 120(%esp,%eax), %ebx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shrl %edi +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl 116(%esp,%eax), %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %ebx, %edi -; X86-NEXT: shll %cl, %ebx +; X86-NEXT: shldl %cl, %edx, %ebx +; X86-NEXT: shll %cl, %edx ; X86-NEXT: addl $1, %ebp ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl $0, %eax +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: jae .LBB4_2 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: adcl $0, %eax +; X86-NEXT: jae .LBB4_3 ; X86-NEXT: # %bb.6: ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: jmp .LBB4_7 -; X86-NEXT: .LBB4_1: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: jmp .LBB4_9 -; X86-NEXT: .LBB4_2: # %udiv-preheader -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: .LBB4_3: # %udiv-preheader +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -355,16 +347,19 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 84(%esp,%eax), %esi -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 80(%esp,%eax), %esi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 80(%esp,%eax), %edi -; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl 76(%esp,%eax), %edi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %esi, %ebx +; X86-NEXT: shrdl %cl, %esi, %edx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 68(%esp,%eax), %edx ; X86-NEXT: movl 72(%esp,%eax), %ebp -; X86-NEXT: movl 76(%esp,%eax), %edx -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: notb %cl ; X86-NEXT: addl %edi, %edi @@ -373,9 +368,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shrdl %cl, %edx, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: shrdl %cl, %ebp, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -388,168 +383,168 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_3: # %udiv-do-while +; X86-NEXT: .LBB4_4: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: shldl $1, %ebx, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebx -; X86-NEXT: shldl $1, %ecx, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: shldl $1, %ebp, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %esi, %ebp +; X86-NEXT: orl %eax, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %ecx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl $1, %eax, %ecx -; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %esi ; X86-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: andl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl %ecx, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: subl %ecx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: sbbl %eax, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload -; X86-NEXT: sbbl %ebp, %ebx -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: sbbl %eax, (%esp) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx +; X86-NEXT: adcl $-1, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: adcl $-1, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: adcl $-1, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebp, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.4: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: movl %ecx, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jne .LBB4_4 +; X86-NEXT: # %bb.5: +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $1, %edi, %esi -; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ebp +; X86-NEXT: orl %eax, %ebp ; X86-NEXT: shldl $1, %ebx, %edi ; X86-NEXT: orl %eax, %edi -; X86-NEXT: addl %ebx, %ebx -; X86-NEXT: orl %ecx, %ebx -; X86-NEXT: .LBB4_9: # %udiv-end -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, (%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: shldl $1, %edx, %ebx +; X86-NEXT: orl %eax, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: imull %eax, %esi +; X86-NEXT: addl %edx, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: .LBB4_8: # %udiv-end +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: imull %ecx, %esi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %ebx, %ebp -; X86-NEXT: addl %edx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: imull %ecx, %ebx +; X86-NEXT: addl %edx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edi -; X86-NEXT: imull %esi, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: imull %ebx, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebp, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: imull %esi, %ebp +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull %eax, %edi +; X86-NEXT: addl %ebp, %edi +; X86-NEXT: addl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: addl %edi, %esi ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %ecx, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ecx, %ebp ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebx, %eax +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: subl (%esp), %ebp # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %esi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: subl (%esp), %ebx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, (%eax) -; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: addl $136, %esp +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: addl $132, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -- 2.7.4