From 87b107dd698fcf0678e65208d670c04cfa570355 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 29 Jun 2018 17:24:07 +0000 Subject: [PATCH] [X86] Limit the number of target specific nodes emitted in LowerShiftParts The important part is the creation of the SHLD/SHRD nodes. The compare and the conditional move can use target independent nodes that can be legalized on their own. This gives some opportunities to trigger the optimizations present in the lowering for those things. And its just better to limit the number of places we emit target specific nodes. The changed test cases still aren't optimal. Differential Revision: https://reviews.llvm.org/D48619 llvm-svn: 335998 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 ++++++--------- llvm/test/CodeGen/X86/legalize-shift-64.ll | 25 ++++++------------- llvm/test/CodeGen/X86/pr32282.ll | 39 ++++++++++++------------------ 3 files changed, 29 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dcc9e23..1e386eb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16130,24 +16130,19 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { // values for large shift amounts. SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, dl, MVT::i8)); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - AndNode, DAG.getConstant(0, dl, MVT::i8)); + SDValue Cond = DAG.getSetCC(dl, MVT::i8, AndNode, + DAG.getConstant(0, dl, MVT::i8), ISD::SETNE); SDValue Hi, Lo; - SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); - SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; - SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); } else { - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); } - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); + return DAG.getMergeValues({ Lo, Hi }, dl); } // Try to use a packed vector operation to handle i64 on 32-bit targets when diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll index ba00232..fdb21e0 100644 --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -142,26 +142,15 @@ define i32 @test6() { ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: orl $0, %eax +; CHECK-NEXT: je .LBB5_3 +; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: shldl $32, %eax, %ecx -; CHECK-NEXT: movb $32, %dl -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne .LBB5_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: sete %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: xorl $1, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: je .LBB5_5 -; CHECK-NEXT: # %bb.3: # %if.then -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: jmp .LBB5_4 -; CHECK-NEXT: .LBB5_5: # %if.end +; CHECK-NEXT: jmp .LBB5_2 +; CHECK-NEXT: .LBB5_3: # %if.end ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB5_4: # %if.then +; CHECK-NEXT: .LBB5_2: # %if.then ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/pr32282.ll b/llvm/test/CodeGen/X86/pr32282.ll index c2d695c..e5547fc 100644 --- a/llvm/test/CodeGen/X86/pr32282.ll +++ b/llvm/test/CodeGen/X86/pr32282.ll @@ -12,30 +12,23 @@ define void @foo() { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: movl d, %ecx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl d, %eax +; X86-NEXT: notl %eax +; X86-NEXT: movl d+4, %ecx ; X86-NEXT: notl %ecx -; X86-NEXT: movl d+4, %edx -; X86-NEXT: notl %edx -; X86-NEXT: andl $701685459, %edx # imm = 0x29D2DED3 -; X86-NEXT: andl $-564453154, %ecx # imm = 0xDE5B20DE -; X86-NEXT: shrdl $21, %edx, %ecx -; X86-NEXT: shrl $21, %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: testb %al, %al -; X86-NEXT: movl %edx, %esi -; X86-NEXT: cmovnel %eax, %esi -; X86-NEXT: cmovel %ecx, %edx -; X86-NEXT: andl $-2, %edx -; X86-NEXT: addl $7, %edx -; X86-NEXT: adcxl %eax, %esi -; X86-NEXT: pushl %esi +; X86-NEXT: andl $701685459, %ecx # imm = 0x29D2DED3 +; X86-NEXT: andl $-564453154, %eax # imm = 0xDE5B20DE +; X86-NEXT: shrdl $21, %ecx, %eax +; X86-NEXT: shrl $21, %ecx +; X86-NEXT: andl $-2, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: addl $7, %eax +; X86-NEXT: adcxl %edx, %ecx +; X86-NEXT: pushl %ecx ; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %edx +; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $0 ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -46,9 +39,7 @@ define void @foo() { ; X86-NEXT: .cfi_adjust_cfa_offset -16 ; X86-NEXT: orl %eax, %edx ; X86-NEXT: setne {{[0-9]+}}(%esp) -; X86-NEXT: addl $4, %esp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %esi +; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; -- 2.7.4