From 56c104f1049d6fce39aa671aa9d019bb5b14cdcb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Jul 2018 23:44:17 +0000 Subject: [PATCH] [X86] Use a two lea sequence for multiply by 37, 41, and 73. These fit a pattern used by 11, 21, and 19. llvm-svn: 337871 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++ llvm/test/CodeGen/X86/funnel-shift.ll | 74 ++++++++++++++++--------------- llvm/test/CodeGen/X86/mul-constant-i16.ll | 24 +++++++--- llvm/test/CodeGen/X86/mul-constant-i32.ll | 36 +++++++++++---- llvm/test/CodeGen/X86/mul-constant-i64.ll | 30 +++++++++---- 5 files changed, 114 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 352e817..c1fb981 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33716,6 +33716,9 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, case 21: // mul x, 21 => add ((shl (mul x, 5), 2), x) return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 41: + // mul x, 41 => add ((shl (mul x, 5), 3), x) + return combineMulShlAddOrSub(5, 3, /*isAdd*/ true); case 22: // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), @@ -33723,6 +33726,12 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, case 19: // mul x, 19 => add ((shl (mul x, 9), 1), x) return combineMulShlAddOrSub(9, 1, /*isAdd*/ true); + case 37: + // mul x, 37 => add ((shl (mul x, 9), 2), x) + return combineMulShlAddOrSub(9, 2, /*isAdd*/ true); + case 73: + // mul x, 73 => add ((shl (mul x, 9), 3), x) + return combineMulShlAddOrSub(9, 3, /*isAdd*/ true); case 13: // mul x, 13 => add ((shl (mul x, 3), 2), x) return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 87f4c3d..d4b3c0d 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -127,31 +127,33 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X64-AVX2-LABEL: fshl_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: pushq %rbx -; X64-AVX2-NEXT: movq %rdx, %r9 -; X64-AVX2-NEXT: movabsq $137438953471, %r10 # imm = 0x1FFFFFFFFF -; X64-AVX2-NEXT: andq %r10, %rsi -; X64-AVX2-NEXT: movl $37, %r8d -; X64-AVX2-NEXT: subq %rdx, %r8 -; X64-AVX2-NEXT: andq %r10, %r9 +; X64-AVX2-NEXT: movq %rdx, %r10 +; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: andq %r8, %rsi +; X64-AVX2-NEXT: movl $37, %r9d +; X64-AVX2-NEXT: subq %rdx, %r9 +; X64-AVX2-NEXT: andq %r8, %r10 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %r11 # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %r9, %rax +; X64-AVX2-NEXT: movq %r10, %rax ; X64-AVX2-NEXT: mulq %r11 ; X64-AVX2-NEXT: shrq $5, %rdx -; X64-AVX2-NEXT: imulq $37, %rdx, %rax -; X64-AVX2-NEXT: subq %rax, %r9 +; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax +; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax +; X64-AVX2-NEXT: subq %rax, %r10 ; X64-AVX2-NEXT: movq %rdi, %rbx -; X64-AVX2-NEXT: movl %r9d, %ecx +; X64-AVX2-NEXT: movl %r10d, %ecx ; X64-AVX2-NEXT: shlq %cl, %rbx +; X64-AVX2-NEXT: andq %r9, %r8 ; X64-AVX2-NEXT: movq %r8, %rax -; X64-AVX2-NEXT: andq %r10, %rax ; X64-AVX2-NEXT: mulq %r11 -; X64-AVX2-NEXT: shrl $5, %edx -; X64-AVX2-NEXT: imull $37, %edx, %eax -; X64-AVX2-NEXT: subl %eax, %r8d -; X64-AVX2-NEXT: movl %r8d, %ecx +; X64-AVX2-NEXT: shrq $5, %rdx +; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax +; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax +; X64-AVX2-NEXT: subl %eax, %r9d +; X64-AVX2-NEXT: movl %r9d, %ecx ; X64-AVX2-NEXT: shrq %cl, %rsi ; X64-AVX2-NEXT: orq %rbx, %rsi -; X64-AVX2-NEXT: testq %r9, %r9 +; X64-AVX2-NEXT: testq %r10, %r10 ; X64-AVX2-NEXT: cmoveq %rdi, %rsi ; X64-AVX2-NEXT: movq %rsi, %rax ; X64-AVX2-NEXT: popq %rbx @@ -359,31 +361,33 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X64-AVX2-LABEL: fshr_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: pushq %rbx -; X64-AVX2-NEXT: movq %rdx, %r9 -; X64-AVX2-NEXT: movabsq $137438953471, %r11 # imm = 0x1FFFFFFFFF -; X64-AVX2-NEXT: movq %rsi, %r10 -; X64-AVX2-NEXT: andq %r11, %r10 -; X64-AVX2-NEXT: movl $37, %r8d -; X64-AVX2-NEXT: subq %rdx, %r8 -; X64-AVX2-NEXT: andq %r11, %r9 +; X64-AVX2-NEXT: movq %rdx, %r10 +; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: movq %rsi, %r11 +; X64-AVX2-NEXT: andq %r8, %r11 +; X64-AVX2-NEXT: movl $37, %r9d +; X64-AVX2-NEXT: subq %rdx, %r9 +; X64-AVX2-NEXT: andq %r8, %r10 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rbx # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %r9, %rax +; X64-AVX2-NEXT: movq %r10, %rax ; X64-AVX2-NEXT: mulq %rbx ; X64-AVX2-NEXT: shrq $5, %rdx -; X64-AVX2-NEXT: imulq $37, %rdx, %rax -; X64-AVX2-NEXT: subq %rax, %r9 -; X64-AVX2-NEXT: movl %r9d, %ecx -; X64-AVX2-NEXT: shrq %cl, %r10 +; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax +; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax +; X64-AVX2-NEXT: subq %rax, %r10 +; X64-AVX2-NEXT: movl %r10d, %ecx +; X64-AVX2-NEXT: shrq %cl, %r11 +; X64-AVX2-NEXT: andq %r9, %r8 ; X64-AVX2-NEXT: movq %r8, %rax -; X64-AVX2-NEXT: andq %r11, %rax ; X64-AVX2-NEXT: mulq %rbx -; X64-AVX2-NEXT: shrl $5, %edx -; X64-AVX2-NEXT: imull $37, %edx, %eax -; X64-AVX2-NEXT: subl %eax, %r8d -; X64-AVX2-NEXT: movl %r8d, %ecx +; X64-AVX2-NEXT: shrq $5, %rdx +; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax +; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax +; X64-AVX2-NEXT: subl %eax, %r9d +; X64-AVX2-NEXT: movl %r9d, %ecx ; X64-AVX2-NEXT: shlq %cl, %rdi -; X64-AVX2-NEXT: orq %r10, %rdi -; X64-AVX2-NEXT: testq %r9, %r9 +; X64-AVX2-NEXT: orq %r11, %rdi +; X64-AVX2-NEXT: testq %r10, %r10 ; X64-AVX2-NEXT: cmoveq %rsi, %rdi ; X64-AVX2-NEXT: movq %rdi, %rax ; X64-AVX2-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/mul-constant-i16.ll b/llvm/test/CodeGen/X86/mul-constant-i16.ll index 45e7df2..d02086f 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i16.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i16.ll @@ -642,13 +642,17 @@ define i16 @test_mul_by_32(i16 %x) { define i16 @test_mul_by_37(i16 %x) { ; X86-LABEL: test_mul_by_37: ; X86: # %bb.0: -; X86-NEXT: imull $37, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_37: ; X64: # %bb.0: -; X64-NEXT: imull $37, %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 37 @@ -658,13 +662,17 @@ define i16 @test_mul_by_37(i16 %x) { define i16 @test_mul_by_41(i16 %x) { ; X86-LABEL: test_mul_by_41: ; X86: # %bb.0: -; X86-NEXT: imull $41, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_41: ; X64: # %bb.0: -; X64-NEXT: imull $41, %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,8), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 41 @@ -697,13 +705,17 @@ define i16 @test_mul_by_62(i16 %x) { define i16 @test_mul_by_73(i16 %x) { ; X86-LABEL: test_mul_by_73: ; X86: # %bb.0: -; X86-NEXT: imull $73, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_73: ; X64: # %bb.0: -; X64-NEXT: imull $73, %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rdi,%rax,8), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 73 diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll index 2be8468..3c0c6c8 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i32.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll @@ -1679,17 +1679,23 @@ define i32 @test_mul_by_32(i32 %x) { define i32 @test_mul_by_37(i32 %x) { ; X86-LABEL: test_mul_by_37: ; X86: # %bb.0: -; X86-NEXT: imull $37, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_37: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imull $37, %edi, %eax # sched: [3:1.00] +; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_37: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imull $37, %edi, %eax # sched: [3:1.00] +; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_37: @@ -1723,17 +1729,23 @@ define i32 @test_mul_by_37(i32 %x) { define i32 @test_mul_by_41(i32 %x) { ; X86-LABEL: test_mul_by_41: ; X86: # %bb.0: -; X86-NEXT: imull $41, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_41: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imull $41, %edi, %eax # sched: [3:1.00] +; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_41: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imull $41, %edi, %eax # sched: [3:1.00] +; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] +; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_41: @@ -1824,17 +1836,23 @@ define i32 @test_mul_by_62(i32 %x) { define i32 @test_mul_by_73(i32 %x) { ; X86-LABEL: test_mul_by_73: ; X86: # %bb.0: -; X86-NEXT: imull $73, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_73: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imull $73, %edi, %eax # sched: [3:1.00] +; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_73: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imull $73, %edi, %eax # sched: [3:1.00] +; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] +; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_73: diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll index e3ee6b4..d6e4ad3 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i64.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll @@ -1764,20 +1764,24 @@ define i64 @test_mul_by_32(i64 %x) { define i64 @test_mul_by_37(i64 %x) { ; X86-LABEL: test_mul_by_37: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $37, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $37, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_37: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_37: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imulq $37, %rdi, %rax # sched: [6:4.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_37: @@ -1814,20 +1818,24 @@ define i64 @test_mul_by_37(i64 %x) { define i64 @test_mul_by_41(i64 %x) { ; X86-LABEL: test_mul_by_41: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %ecx ; X86-NEXT: movl $41, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $41, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_41: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_41: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imulq $41, %rdi, %rax # sched: [6:4.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_41: @@ -1927,20 +1935,24 @@ define i64 @test_mul_by_62(i64 %x) { define i64 @test_mul_by_73(i64 %x) { ; X86-LABEL: test_mul_by_73: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,8), %ecx ; X86-NEXT: movl $73, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $73, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_73: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_73: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: imulq $73, %rdi, %rax # sched: [6:4.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_73: -- 2.7.4