From: Craig Topper Date: Mon, 4 Feb 2019 18:43:55 +0000 (+0000) Subject: [X86] Add ST0 as an implicit def/use of x87 load/store instructions during FP stackif... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8ea72a82012481d34fd6e0e00c14dbb94ef9ec94;p=platform%2Fupstream%2Fllvm.git [X86] Add ST0 as an implicit def/use of x87 load/store instructions during FP stackifying. These instructions implicitly operate on ST0, but we don't currently add that information to the MachineInstr. We also don't add it the tablegen definitions either. For the most part this doesn't cause any problems because the stackifying occurs after register allocation. All the instructions are marked as having side effects so the postRA scheduler won't reorder them amongst themselves. But nothing stops inline assembly using X87 instructions from being reordered around other x87 instructions if that inline assembly wasn't marked volatile. The two test cases I've identified so far in PR40539 involve loads and stores used to set up the inline assembly or capture the results of the inline assembly ending up in the wrong order. This patch adds implicit ST0 uses/defs to the load/store instructions to prevent this from happening. I plan to fix all of the FP instructions, but the binops are bit trickier to get right. So I've chosen fixing the known test cases as a good first step. I think we also need to update the tablegen descriptions so MS inline assembly infers the right clobbers, but I haven't checked that yet. Differential Revision: https://reviews.llvm.org/D57644 llvm-svn: 353070 --- diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 452db72..bd2e8a2 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -1095,6 +1095,8 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { // Change from the pseudo instruction to the concrete instruction. MI.RemoveOperand(0); // Remove the explicit ST(0) operand MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); + MI.addOperand( + MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true)); // Result gets pushed on the stack. pushReg(DestReg); @@ -1139,6 +1141,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { // Convert from the pseudo instruction to the concrete instruction. MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); + MI.addOperand( + MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true)); if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m || MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m || diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll index 7b979c6..f2135cd 100644 --- a/llvm/test/CodeGen/X86/pr40539.ll +++ b/llvm/test/CodeGen/X86/pr40539.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=pentium4 | FileCheck %s -; FIXME: The fstps in the following test case should be between the inline -; assembly expansion and the cmpeqss. The postRA scheduler has rearranged them. - @f1 = global float 1.000000e+00, align 4 define zeroext i1 @_Z9test_log2v() { @@ -12,13 +9,13 @@ define zeroext i1 @_Z9test_log2v() { ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: flds f1 -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: #APP ; CHECK-NEXT: fld1 ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fyl2x ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: fstps (%esp) +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cmpeqss (%esp), %xmm0 ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: andl $1, %eax @@ -44,12 +41,12 @@ define zeroext i1 @_Z8test_cosv() { ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: #APP -; CHECK-NEXT: fcos -; CHECK-NEXT: #NO_APP ; CHECK-NEXT: divss {{\.LCPI.*}}, %xmm0 ; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: #APP +; CHECK-NEXT: fcos +; CHECK-NEXT: #NO_APP ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: ucomiss %xmm0, %xmm1