[X86] Avoid converting u64 to f32 using x87 on Windows

author icedrocket <114203630+icedrocket@users.noreply.github.com>

Thu, 19 Jan 2023 06:35:13 +0000 (22:35 -0800)

committer Craig Topper <craig.topper@sifive.com>

Thu, 19 Jan 2023 06:41:34 +0000 (22:41 -0800)
author icedrocket <114203630+icedrocket@users.noreply.github.com>
Thu, 19 Jan 2023 06:35:13 +0000 (22:35 -0800)
committer Craig Topper <craig.topper@sifive.com>
Thu, 19 Jan 2023 06:41:34 +0000 (22:41 -0800)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index c88c66d..fb68a9c 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21898,6 +21898,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
    }
  
    assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+
+  // On Windows, the default precision control on x87 is only 53-bit, and FADD
+  // triggers rounding with that precision, so the final result may be less
+  // accurate. 18014397972611071 is one such case.
+  if (Subtarget.isOSWindows())
+    return SDValue();
+
    SDValue ValueToStore = Src;
    if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
      // Bitcasting to f64 here allows us to do a single 64-bit store from
diff --git a/llvm/test/CodeGen/X86/uint64-to-float.ll b/llvm/test/CodeGen/X86/uint64-to-float.ll

index 8b66234..f44986b 100644 (file)
--- a/llvm/test/CodeGen/X86/uint64-to-float.ll
+++ b/llvm/test/CodeGen/X86/uint64-to-float.ll
@@ -1,6 +1,8 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86-WIN
+; RUN: llc < %s -mtriple=x86_64-windows -mattr=+sse2 | FileCheck %s --check-prefix=X64-WIN
  
  ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
  ; by the compiler_rt implementation of __floatundisf.
@@ -42,6 +44,36 @@ define float @test(i64 %a) nounwind {
  ; X64-NEXT:    cvtsi2ss %rdi, %xmm0
  ; X64-NEXT:    addss %xmm0, %xmm0
  ; X64-NEXT:    retq
+;
+; X86-WIN-LABEL: test:
+; X86-WIN:       # %bb.0: # %entry
+; X86-WIN-NEXT:    pushl %ebp
+; X86-WIN-NEXT:    movl %esp, %ebp
+; X86-WIN-NEXT:    andl $-8, %esp
+; X86-WIN-NEXT:    subl $8, %esp
+; X86-WIN-NEXT:    pushl 12(%ebp)
+; X86-WIN-NEXT:    pushl 8(%ebp)
+; X86-WIN-NEXT:    calll ___floatundisf
+; X86-WIN-NEXT:    addl $8, %esp
+; X86-WIN-NEXT:    movl %ebp, %esp
+; X86-WIN-NEXT:    popl %ebp
+; X86-WIN-NEXT:    retl
+;
+; X64-WIN-LABEL: test:
+; X64-WIN:       # %bb.0: # %entry
+; X64-WIN-NEXT:    testq %rcx, %rcx
+; X64-WIN-NEXT:    js .LBB0_1
+; X64-WIN-NEXT:  # %bb.2: # %entry
+; X64-WIN-NEXT:    cvtsi2ss %rcx, %xmm0
+; X64-WIN-NEXT:    retq
+; X64-WIN-NEXT:  .LBB0_1:
+; X64-WIN-NEXT:    movq %rcx, %rax
+; X64-WIN-NEXT:    shrq %rax
+; X64-WIN-NEXT:    andl $1, %ecx
+; X64-WIN-NEXT:    orq %rax, %rcx
+; X64-WIN-NEXT:    cvtsi2ss %rcx, %xmm0
+; X64-WIN-NEXT:    addss %xmm0, %xmm0
+; X64-WIN-NEXT:    retq
  entry:
    %b = uitofp i64 %a to float
    ret float %b
author	icedrocket <114203630+icedrocket@users.noreply.github.com>
	Thu, 19 Jan 2023 06:35:13 +0000 (22:35 -0800)
committer	Craig Topper <craig.topper@sifive.com>
	Thu, 19 Jan 2023 06:41:34 +0000 (22:41 -0800)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/uint64-to-float.ll		patch \| blob \| history