From: Craig Topper Date: Wed, 3 Aug 2022 16:37:33 +0000 (-0700) Subject: [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ff91b2d9df8098fca31684c8595087ec9f921eda;p=platform%2Fupstream%2Fllvm.git [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always. If we're going to emit a rep prefix before bsf as proposed in D130956, it makes sense to promote i16 operations to i32 to avoid the false depedency of tzcntw. Reviewed By: skan, pengfei Differential Revision: https://reviews.llvm.org/D130995 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2e98ec5..c52a964 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -368,15 +368,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // encoding. setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); + // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit + // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to + // promote that too. + setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32); + setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32); - if (Subtarget.hasBMI()) { - // Promote the i16 zero undef variant and force it on up to i32 when tzcnt - // is enabled. - setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32); - } else { - setOperationAction(ISD::CTTZ, MVT::i16, Custom); + if (!Subtarget.hasBMI()) { setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); if (Subtarget.is64Bit()) { setOperationAction(ISD::CTTZ , MVT::i64 , Custom); diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll index fc3e54e..58b894a 100644 --- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -11,12 +11,17 @@ define i16 @test__tzcnt_u16(i16 %a0) { ; X86-LABEL: test__tzcnt_u16: ; X86: # %bb.0: -; X86-NEXT: tzcntw {{[0-9]+}}(%esp), %ax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl $65536, %eax # imm = 0x10000 +; X86-NEXT: tzcntl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test__tzcnt_u16: ; X64: # %bb.0: -; X64-NEXT: tzcntw %di, %ax +; X64-NEXT: orl $65536, %edi # imm = 0x10000 +; X64-NEXT: tzcntl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 @@ -138,12 +143,17 @@ define i32 @test__tzcnt_u32(i32 %a0) { define i16 @test_tzcnt_u16(i16 %a0) { ; X86-LABEL: test_tzcnt_u16: ; X86: # %bb.0: -; X86-NEXT: tzcntw {{[0-9]+}}(%esp), %ax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl $65536, %eax # imm = 0x10000 +; X86-NEXT: tzcntl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_tzcnt_u16: ; X64: # %bb.0: -; X64-NEXT: tzcntw %di, %ax +; X64-NEXT: orl $65536, %edi # imm = 0x10000 +; X64-NEXT: tzcntl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll index a4c3457..ff5bfc9 100644 --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -46,12 +46,14 @@ define i8 @cttz_i8(i8 %x) { define i16 @cttz_i16(i16 %x) { ; X86-LABEL: cttz_i16: ; X86: # %bb.0: -; X86-NEXT: rep bsfw {{[0-9]+}}(%esp), %ax +; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: cttz_i16: ; X64: # %bb.0: -; X64-NEXT: rep bsfw %di, %ax +; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i16: @@ -565,10 +567,12 @@ define i16 @cttz_i16_zero_test(i16 %n) { ; X86-NEXT: testw %ax, %ax ; X86-NEXT: je .LBB13_1 ; X86-NEXT: # %bb.2: # %cond.false -; X86-NEXT: rep bsfw %ax, %ax +; X86-NEXT: rep bsfl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; X86-NEXT: .LBB13_1: ; X86-NEXT: movw $16, %ax +; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: cttz_i16_zero_test: @@ -576,20 +580,27 @@ define i16 @cttz_i16_zero_test(i16 %n) { ; X64-NEXT: testw %di, %di ; X64-NEXT: je .LBB13_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfw %di, %ax +; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; X64-NEXT: .LBB13_1: ; X64-NEXT: movw $16, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i16_zero_test: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax +; X86-CLZ-NEXT: movl $65536, %eax # imm = 0x10000 +; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: tzcntl %eax, %eax +; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: cttz_i16_zero_test: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: tzcntw %di, %ax +; X64-CLZ-NEXT: orl $65536, %edi # imm = 0x10000 +; X64-CLZ-NEXT: tzcntl %edi, %eax +; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax ; X64-CLZ-NEXT: retq %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) ret i16 %tmp1 diff --git a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll index f271bd0..bced63e 100644 --- a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll +++ b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll @@ -160,7 +160,9 @@ define i64 @test15_ctlz(ptr %ptr) { define i16 @test1_cttz(i16 %v) { ; CHECK-LABEL: test1_cttz: ; CHECK: # %bb.0: -; CHECK-NEXT: tzcntw %di, %ax +; CHECK-NEXT: orl $65536, %edi # imm = 0x10000 +; CHECK-NEXT: tzcntl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) %tobool = icmp eq i16 %v, 0 @@ -196,7 +198,9 @@ define i64 @test3_cttz(i64 %v) { define i16 @test4_cttz(i16 %v) { ; CHECK-LABEL: test4_cttz: ; CHECK: # %bb.0: -; CHECK-NEXT: tzcntw %di, %ax +; CHECK-NEXT: orl $65536, %edi # imm = 0x10000 +; CHECK-NEXT: tzcntl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) %tobool = icmp eq i16 0, %v @@ -232,7 +236,10 @@ define i64 @test6_cttz(i64 %v) { define i16 @test10_cttz(ptr %ptr) { ; CHECK-LABEL: test10_cttz: ; CHECK: # %bb.0: -; CHECK-NEXT: tzcntw (%rdi), %ax +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: orl $65536, %eax # imm = 0x10000 +; CHECK-NEXT: tzcntl %eax, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %v = load i16, ptr %ptr %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) @@ -271,7 +278,10 @@ define i64 @test12_cttz(ptr %ptr) { define i16 @test13_cttz(ptr %ptr) { ; CHECK-LABEL: test13_cttz: ; CHECK: # %bb.0: -; CHECK-NEXT: tzcntw (%rdi), %ax +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: orl $65536, %eax # imm = 0x10000 +; CHECK-NEXT: tzcntl %eax, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %v = load i16, ptr %ptr %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) @@ -346,7 +356,9 @@ define i64 @test6b_ctlz(i64 %v) { define i16 @test4b_cttz(i16 %v) { ; CHECK-LABEL: test4b_cttz: ; CHECK: # %bb.0: -; CHECK-NEXT: tzcntw %di, %ax +; CHECK-NEXT: orl $65536, %edi # imm = 0x10000 +; CHECK-NEXT: tzcntl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) %tobool = icmp ne i16 %v, 0