From 4af5b23db308c89edeb9fdc6dfbe7e6457b22f1d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Dec 2019 09:57:44 -0800 Subject: [PATCH] [X86FixupSetCC] Remember the preceding eflags defining instruction while we're scanning the basic block instead of looking back for it. Summary: We're already scanning forward through the basic block. Might as well just remember eflags defs instead of doing a bounded search backwards later. Based on a comment in D71841. Reviewers: RKSimon, spatel, uweigand Reviewed By: uweigand Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71865 --- llvm/lib/Target/X86/X86FixupSetCC.cpp | 32 ++------- llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll | 86 +++++++++++++------------ 2 files changed, 49 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp index 8ecdce4..924f429 100644 --- a/llvm/lib/Target/X86/X86FixupSetCC.cpp +++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp @@ -43,14 +43,6 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; private: - // Find the preceding instruction that imp-defs eflags. - MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB, - MachineBasicBlock::reverse_iterator MI); - - // Return true if this is the opcode of a SetCC instruction with a register - // output. - bool isSetCCr(unsigned Opode); - MachineRegisterInfo *MRI = nullptr; const X86InstrInfo *TII = nullptr; @@ -64,22 +56,6 @@ char X86FixupSetCCPass::ID = 0; FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } -// We expect the instruction *immediately* before the setcc to imp-def -// EFLAGS (because of scheduling glue). To make this less brittle w.r.t -// scheduling, look backwards until we hit the beginning of the -// basic-block, or a small bound (to avoid quadratic behavior). -MachineInstr * -X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB, - MachineBasicBlock::reverse_iterator MI) { - // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator? - auto MBBStart = MBB->rend(); - for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI) - if (MI->definesRegister(X86::EFLAGS)) - return &*MI; - - return nullptr; -} - bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MRI = &MF.getRegInfo(); @@ -88,7 +64,12 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { SmallVector ToErase; for (auto &MBB : MF) { + MachineInstr *FlagsDefMI = nullptr; for (auto &MI : MBB) { + // Remember the most recent preceding eflags defining instruction. + if (MI.definesRegister(X86::EFLAGS)) + FlagsDefMI = &MI; + // Find a setcc that is used by a zext. // This doesn't have to be the only use, the transformation is safe // regardless. @@ -103,9 +84,6 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { if (!ZExt) continue; - // Find the preceding instruction that imp-defs eflags. - MachineInstr *FlagsDefMI = findFlagsImpDef( - MI.getParent(), MachineBasicBlock::reverse_iterator(&MI)); if (!FlagsDefMI) continue; diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll index 6310ae3..2a51ac4 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -194,7 +194,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; AVX512VL-32-NEXT: movl %eax, %edi +; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3 ; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4 @@ -210,8 +210,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; AVX512VL-32-NEXT: movl %eax, %esi -; AVX512VL-32-NEXT: xorl %edx, %edx +; AVX512VL-32-NEXT: movl %eax, %edi ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3 ; AVX512VL-32-NEXT: setb %al ; AVX512VL-32-NEXT: kmovw %eax, %k1 @@ -221,12 +220,14 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) -; AVX512VL-32-NEXT: setae %dl -; AVX512VL-32-NEXT: shll $31, %edx -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512VL-32-NEXT: movl $0, %eax +; AVX512VL-32-NEXT: setae %al +; AVX512VL-32-NEXT: shll $31, %eax +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: movl %eax, %esi ; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] -; AVX512VL-32-NEXT: xorl %ecx, %ecx +; AVX512VL-32-NEXT: xorl %edx, %edx ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4 ; AVX512VL-32-NEXT: setb %al ; AVX512VL-32-NEXT: kmovw %eax, %k1 @@ -236,13 +237,13 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) -; AVX512VL-32-NEXT: setae %cl -; AVX512VL-32-NEXT: shll $31, %ecx -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; AVX512VL-32-NEXT: setae %dl +; AVX512VL-32-NEXT: shll $31, %edx +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx ; AVX512VL-32-NEXT: xorl %eax, %eax ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3 -; AVX512VL-32-NEXT: setb %bl -; AVX512VL-32-NEXT: kmovw %ebx, %k1 +; AVX512VL-32-NEXT: setb %cl +; AVX512VL-32-NEXT: kmovw %ecx, %k1 ; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4 ; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1} ; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3 @@ -252,6 +253,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: xorl %ecx, %ecx ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0 ; AVX512VL-32-NEXT: setb %bl ; AVX512VL-32-NEXT: kmovw %ebx, %k1 @@ -263,21 +265,20 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 +; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 +; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 ; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload -; AVX512VL-32-NEXT: setae %al -; AVX512VL-32-NEXT: movzbl %al, %eax -; AVX512VL-32-NEXT: shll $31, %eax -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: setae %cl +; AVX512VL-32-NEXT: shll $31, %ecx +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 +; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3 ; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload ; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 @@ -498,7 +499,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; AVX512VL-32-NEXT: movl %eax, %edi +; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4 @@ -514,8 +515,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; AVX512VL-32-NEXT: movl %eax, %esi -; AVX512VL-32-NEXT: xorl %edx, %edx +; AVX512VL-32-NEXT: movl %eax, %edi ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3 ; AVX512VL-32-NEXT: setb %al ; AVX512VL-32-NEXT: kmovw %eax, %k1 @@ -525,11 +525,13 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) -; AVX512VL-32-NEXT: setae %dl -; AVX512VL-32-NEXT: shll $31, %edx -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512VL-32-NEXT: movl $0, %eax +; AVX512VL-32-NEXT: setae %al +; AVX512VL-32-NEXT: shll $31, %eax +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: movl %eax, %esi ; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3] -; AVX512VL-32-NEXT: xorl %ecx, %ecx +; AVX512VL-32-NEXT: xorl %edx, %edx ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4 ; AVX512VL-32-NEXT: setb %al ; AVX512VL-32-NEXT: kmovw %eax, %k1 @@ -539,14 +541,14 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm4, (%esp) ; AVX512VL-32-NEXT: flds (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) -; AVX512VL-32-NEXT: setae %cl -; AVX512VL-32-NEXT: shll $31, %ecx -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; AVX512VL-32-NEXT: setae %dl +; AVX512VL-32-NEXT: shll $31, %edx +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx ; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] ; AVX512VL-32-NEXT: xorl %eax, %eax ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3 -; AVX512VL-32-NEXT: setb %bl -; AVX512VL-32-NEXT: kmovw %ebx, %k1 +; AVX512VL-32-NEXT: setb %cl +; AVX512VL-32-NEXT: kmovw %ecx, %k1 ; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4 ; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1} ; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3 @@ -556,6 +558,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: xorl %ecx, %ecx ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0 ; AVX512VL-32-NEXT: setb %bl ; AVX512VL-32-NEXT: kmovw %ebx, %k1 @@ -567,21 +570,20 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 +; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 +; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 ; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload -; AVX512VL-32-NEXT: setae %al -; AVX512VL-32-NEXT: movzbl %al, %eax -; AVX512VL-32-NEXT: shll $31, %eax -; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; AVX512VL-32-NEXT: setae %cl +; AVX512VL-32-NEXT: shll $31, %ecx +; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 +; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3 ; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload ; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -- 2.7.4